---
title: "Bowling Alone or Masking Together? The Role of Social Capital in Excess Death Rates from COVID19"
subtitle: "Replication Code"
date: "March 15, 2021"
author: "Timothy Fraser, Daniel P. Aldrich, and Courtney Page-Tan"
output: html_notebook
---

This replication code file generates our dataset for estimating the effects of social capital on excess death rates during the COVID-19 pandemic. Below, we summarize our basic approach.

- First, we collected excess death rates among counties affected by the COVID-19 pandemic, collected at several time-periods, creating an unbalanced panel dataset.

- Second, we identified a set of counties as similar as possible, based on the consistently available counties, except that some had high social capital and some had low social capital.

We test the effects of three kinds of social capital indices, including 1. Kyne & Aldrich's bonding, bridging, and linking social capital indices, 2. The Penn State indices, and 3. the US Senate Committee's social capital indices.



# 0. Packages

```{r, message = FALSE, warning = FALSE}
# Install the following packages to perform our replication code
#install.packages(c("tidyverse", "MatchIt", "cem", "Zelig", "ggpubr", "texreg", "MESS"))

# Load packages
library(tidyverse) # for data manipulation
library(MatchIt) # for matching function wrappers
library(cem) # for coarsened exact matching
library(Zelig) # for statistical simulation
library(ggpubr) # for binding plots
#library(texreg) # great plots
library(MESS) # for gee diagnostics

# For data manipulation
library(tidyverse)
# For multilevel modeling
library(gee)
library(geepack)
library(Zelig)
library(plm)
library(lme4)
library(lmerTest)
# For diagnostics
library(performance)
library(MESS)

```


# 1. Build Dataset

## 1.1 Excess Death Rate

To calculate excess death rates, we are going to first access as many deaths per month in counties in 2020 in counties where any COVID outbreaks were detected (that's a CDC constraint on the data; they release only total deaths for counties where there are at least 10 cases of COVID-19 reported). This data was collected for several timestamps between Februrary 1 and September 26, 2020. CDC does not release anything more detailed than that, in order to ensure data privacy for residents.


```{r}
# See files in our excess death folder
dir("excess_deaths")

# Let's write a quick function
import = function(filename){
  
  filename %>%
    read_csv() %>%
    select(
      date = `Last week`,
      fips = `FIPS County Code`,
      deaths = `Deaths from All Causes`,
      deaths_covid = `Deaths involving COVID-19`) %>%
    # Occasionally the date of the study period isn't included,
    # but they were all released together, so we know it is the same date
    # so we fill it in
    fill(date, .direction = "downup") %>%
    mutate(fips = fips %>% str_pad(5, "left", "0"),
           date = lubridate::mdy(date))  %>%
    return()
}

# Import total deaths per county in 2020
now <- dir("excess_deaths/over_time", full.names = TRUE) %>%
  lapply(import) %>%
  bind_rows() %>%
  # Sometimes the regular reports were just identical and were not updated.
  # We grab the distinct reports
  distinct()
```

Next, let's import monthly deaths per county in each year over time.

```{r}
# First, we downloaded from CDC Wonder's Underlying Causes of Death dataset 1998-2019 the data for the years 2015, 2016, 2017, 2018, and 2019.
data.frame(file = dir("excess_deaths/prior_deaths/original_data/", full.names = TRUE)) %>%
  # For each file downloaded
  split(.$file) %>%
  # Read it in
  map_dfr(~read_delim(.$file, delim = "\t") %>% 
            # Grab just these variables
            dplyr::select(fips = `County Code`, date = `Month Code`, deaths = Deaths) %>%
            # Make the date
            mutate(date = lubridate::make_date(year = str_sub(date, 1,4), month = str_sub(date, 6,7))) %>%
            # Make the county code
            mutate(fips = str_pad(fips, width = 5, side = "left", pad = "0"))) %>%
  # Some datasets involved multiple date-ranges.
  # We only want deaths per county from January to October each year
  filter(str_sub(date, 6, 7) %in% c("01","02","03","04","05","06","07","08","09","10")) %>%
  # And a rrange nicely
  arrange(date, fips) %>%
  write_csv("excess_deaths/prior_deaths/prior_deaths.csv")

# Import prior deaths each month per county 2015-2019
# (This conveniently predates estimates of how early COVID may have hit the US, eg. November 2019, according to some accounts, or January-February 2021, according to others. )
then <- read_csv("excess_deaths/prior_deaths/prior_deaths.csv") %>%
  # Let's adjust the data.frame so that each row is assigned...
  mutate(
    # They year of the observation
    year = str_sub(date, 1,4),
    # And the date of the observation, but all affixed to the year 2020,
    # which will be used to join the data later.
    date = lubridate::make_date(
      year = 2020, month = str_sub(date, 6,7),
      day = str_sub(date, 9, 10))) %>%
  # Now pivot wider into a wide matrix, where columns are years of death data,
  # and rows are county-days
  pivot_wider(
    id_cols = c(fips, date),
    names_from = year, names_prefix = "prior_deaths_",
    values_from = deaths) %>%
  # For each county-date, calculate the mean deaths over the past years
  rowwise() %>%
  mutate(
    # For five years...
    prior_deaths_5yr = sum(prior_deaths_2015, prior_deaths_2016,
                            prior_deaths_2017, prior_deaths_2018,
                            prior_deaths_2019, na.rm = TRUE) / 5,
    # And for three years
    prior_deaths_3yr = sum(prior_deaths_2017, prior_deaths_2018,
                            prior_deaths_2019, na.rm = TRUE) / 3) %>%
  ungroup()
```


I have cumulative deaths during 2020. If possible, I'd like to subtract them

```{r}
now %>%
  group_by(date) %>%
  select(fips) %>%
  distinct() %>%
  count()

now %>% 
  arrange(date, fips) %>%
  head(100)
```

For 287 counties, which had at least 10 COVID deaths, we can report how many excess deaths occurred between Feb 1 and April 24, Feb 1 and May 16, Feb 1 and May 23, Feb 1 and August 22, and Feb 1 and August 29. This is pretty useful!

```{r}
# We have counts halfway through the week...
# I want to create a day-by-day estimate
expand_grid(fips = unique(then$fips),
            date = seq(from = then$date %>% sort() %>% head(1),
                       to = then$date %>% sort() %>% tail(1),
                       by = "days")) %>%
  # Join in the total number of deaths per month to every daily observation
  left_join(by = c("fips", "date" = "date"),
            y = then) %>%
  # Get an estimate of average deaths per day for a given month,
  # for each period
  mutate_at(vars(prior_deaths_2015, prior_deaths_2016,
                 prior_deaths_2017, prior_deaths_2018, prior_deaths_2019,
                 prior_deaths_5yr, prior_deaths_3yr),
                 funs(. / lubridate::days_in_month(date))) %>%
              arrange(fips, date) %>%
  # Now fill in each day with the same average amount,
  # and take their cumulative sum, creating an estimate for how many people 
  # died in the month by that given date, amoratized per day
  mutate(month = cut(date, breaks = "month")) %>%
  group_by(fips) %>%
  fill(contains("prior_deaths"), .direction = "down") %>%
  ungroup() %>%
  select(-month) %>%
  # Now, starting on February 1, how many people died BY THAT DAY?
  filter(date >= "2018-02-01") %>%
  # Get the cumulative deaths for each field
  group_by(fips) %>%
  mutate_at(vars(contains("prior_deaths")), funs(cumsum(.))) %>%
  ungroup() %>%
  # Now, join in the observations we have from 2020
  left_join(by = c("fips", "date"),
            y = now %>% select(fips, date, 
                               deaths_today = deaths) %>%
              # Edit the date so it matches the 2020 records
              mutate(date = lubridate::make_date(year = 2020,
                     month = lubridate::month(date),
                     day = lubridate::day(date)))) %>%
  # And filter to just dates we have data on from 2020 vs. before
  filter(!is.na(deaths_today), !is.na(prior_deaths_2019), 
                 !is.na(prior_deaths_3yr), !is.na(prior_deaths_5yr)) %>%
  # Join in population
  left_join(y = read_csv("raw_data/chr_2020.csv") %>% 
              dplyr::select(fips = `5-digit FIPS Code`,
                                    pop = `Population raw value`),
            by = "fips") %>%
  mutate(pop = as.numeric(pop)) %>%
  # Calculate excess deaths
  mutate(excess_deaths_1yr = deaths_today - prior_deaths_2019,
         excess_deaths_3yr = deaths_today - prior_deaths_3yr,
         excess_deaths_5yr = deaths_today - prior_deaths_5yr) %>%
    # save
  write_csv("excess_deaths/excess_deaths.csv")
```


We estimated the excess deaths in the following manner.
First, the CDC provides only cumulative updates of the number of total deaths per county in 2020. So, their data looks like this: (row 1) NYC March 1 - 2 deaths. (row 2) NYC May 26 - 5 deaths (+3 from previous).

What would really be BETTER for us is to know the number of deaths in that period. How many deaths occurred BETWEEN observations?  

Our analysis ideally should look like the following:

Here are our 6 datasets:
2020-04-25	(287 counties)
2020-05-16	(461 counties)
2020-05-23	(524 counties)
2020-08-22	(923 counties)
2020-08-29	(949 counties)
2020-09-26	(1071 counties)

We know how many people died between Feb 1 and April 25.
We know how many people died between Feb 1 and May 16.
We know how many people died between Feb 1 and May 23.
We know how many people died between Feb 1 and August 22.
We know how many people died between Feb 1 and August 29.
We know how many people died between Feb 1 and September 26.

The dataset above calculated the estimated cumulative deaths in 2018 and the estimated cumulative deaths in 2020.

Deaths between Feb 1 and April 25.
-I can get this for 287 counties.
Deaths between April 25 - May 16
-I can get this for 287 counties.
Deaths between May 16 - May 23
-I can get this for 461 counties
-Can't for the 63 counties that didn't report COVID deaths by April 16.
-While they might still have had undetected COVID deaths, CDC does not report any deaths unless COVID deaths were detected, so those 63 counties are lost.
Deaths between April 23 - August 22
-I can get this for 524 counties
-Same issue
Deaths between August 22 - August 29
- I can get this for 923 counties
Deaths between August 29 - September 26
- I can get this for 949 counties

```{r, message = FALSE, warning = FALSE}
read_csv("excess_deaths/excess_deaths.csv") %>% 
  # For each county,
  group_by(fips) %>%
  arrange(fips, date) %>%
  # Calculate the deaths during each period in the present
  mutate(deaths_today_lag = lag(deaths_today, 1)) %>%
  # Fix so that it only becomes cumulative after the first period
  # (This means we catch the first set of cases to occur, rather than get 0)
  mutate(deaths_today_lag = if_else(date == "2020-04-25", 0, deaths_today_lag)) %>%
  mutate(deaths_today_period = deaths_today - deaths_today_lag) %>%
  # Calculate the deaths during each period before
  mutate(prior_deaths_5yr_lag = lag(prior_deaths_5yr, 1),
            prior_deaths_3yr_lag = lag(prior_deaths_3yr, 1),
            prior_deaths_2019_lag = lag(prior_deaths_2019, 1)) %>%
  # Fix so that it only becomes cumulative after the first period
  mutate_at(vars(prior_deaths_5yr_lag,
                 prior_deaths_3yr_lag,
                 prior_deaths_2019_lag),
            funs(if_else(date == "2020-04-25", 0, .))) %>%
  # 
  mutate(deaths_5yr_period = prior_deaths_5yr - prior_deaths_5yr_lag,
         deaths_3yr_period = prior_deaths_3yr - prior_deaths_3yr_lag,
         deaths_1yr_period = prior_deaths_2019 - prior_deaths_2019_lag) %>%
  # Now calculate excess deaths during each period
  mutate(excess_deaths_1yr_period = deaths_today_period - deaths_1yr_period,
         excess_deaths_3yr_period = deaths_today_period - deaths_3yr_period,
         excess_deaths_5yr_period = deaths_today_period - deaths_5yr_period) %>%
  select(fips, date, pop, 
         excess_deaths_1yr_period, excess_deaths_3yr_period,
         excess_deaths_5yr_period) %>%
  filter(
    !is.na(excess_deaths_1yr_period),
    !is.na(excess_deaths_3yr_period),
    !is.na(excess_deaths_5yr_period)) %>%
  # Calculate the number of excess deaths during this period,
  # per 100,000 residents
  mutate(excess_deaths_1yr_period_rate = excess_deaths_1yr_period / pop * 100000,
         excess_deaths_3yr_period_rate = excess_deaths_3yr_period / pop * 100000,
         excess_deaths_5yr_period_rate = excess_deaths_5yr_period / pop * 100000) %>%
  ungroup() %>%
  write_csv("excess_deaths/excess_deaths_period.csv")
```



## 1.2 Social Capital Indices

### Kyne & Aldrich's 2019 (last updated 2019)

```{r, message = FALSE, warning = FALSE}
# Using the "dataverse" package, let's download Kyne and Aldrich's dataset straight from source.
#install.packages("dataverse")

#Read their codebook here:
#https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/IUNNZM

library(dataverse)
Sys.setenv("DATAVERSE_SERVER" = "dataverse.harvard.edu")

# Download the Kyne & Aldrich indices
kyne_aldrich <- get_dataframe_by_name(
    filename  = "Kyne Aldrich Capturing Dataset 2020.tab",
    dataset   = "10.7910/DVN/IUNNZM"
  ) %>%
  # Grab just the variables we need
  select(fips = fips_n, socialcap, bonding, bridging, linking) %>%
  mutate(fips = fips %>% str_pad(width = 5, side = "left", pad = "0")) 
```

### Rupasingha et al. 2006 (last updated 2014)

Rupasingha, A., Goetz, S. J., & Freshwater, D. (2006, with updates). The production of social capital in US counties. Journal of Socio-Economics, 35, 83-101. doi:10.1016/j.socec.2005.11.001.

```{r, message = FALSE, warning = FALSE}
rupasignha <- readxl::read_excel("raw_data/rupasingha_et_al_social_capital_indices_2014.xlsx") %>%
  select(fips = FIPS, sci_psu = sk2014) %>%
  mutate(fips = str_pad(fips, width = 5, side = "left", pad = "0")) 
```


### USJEC Senate Committee Index

https://www.jec.senate.gov/public/index.cfm/republicans/2018/4/the-geography-of-social-capital-in-america#toc-015-backlink

```{r, message = FALSE, warning = FALSE}
usjec <- readxl::read_excel("raw_data/usjec_senate_social_capital_indices_2018.xlsx", sheet = "County Index", skip = 2) %>%
  magrittr::set_colnames(value = names(.) %>% tolower() %>% 
                           str_replace_all(" |[-]", "_")) %>%
  select(fips = fips_code, sci_usjec = county_level_index, 
         family_unity_usjec = family_unity, 
         community_health_usjec = community_health, 
         institutional_health_usjec = institutional_health, 
         collective_efficacy_usjec = collective_efficacy) %>%
  mutate(fips = str_pad(fips, width = 5, side = "left", pad = "0")) 
```

### Combine

```{r, message = FALSE, warning = FALSE}
kyne_aldrich %>%
  left_join(by = "fips", y = rupasignha) %>%
  left_join(by = "fips", y = usjec) %>%
  write_csv("raw_data/indices.csv")
```


## 1.3 Area data

```{r}
tigris::counties(year = "2018") %>%
  sf::st_as_sf() %>%
  as.data.frame() %>% ungroup() %>%
  select(-geometry) %>%
  select(fips = GEOID, area_land = ALAND) %>%
  # convert land area, which is measured in square meters, to square kilometers
  mutate(area_land = as.numeric(area_land) / 1000000) %>%
  write_csv("raw_data/area.csv")
```

## 1.4 Party Voteshare data

```{r mitdata}
# ImportCounty Presidential Election Results 2000-2016 Data
# from MIT Elections Data & Science Lab
# https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/VOQCHQ

read_csv("raw_data/MIT_countypres_2000_2016.csv") %>%
  filter(office == "President") %>%
  filter(party %in% c("democrat", "republican")) %>% 
  rename(fips = FIPS) %>%
  # Convert unique county id FIPS code from numeric to a five digit character code
  mutate(fips = str_pad(fips, width = 5, side = "left", pad = "0")) %>%
  
  # Aggregate voteshare by county, party, and year
  group_by(fips, party, year) %>%
    # Create the share of votes that went to each party per county-year election
  summarize(voteshare = sum(candidatevotes, na.rm = TRUE) / sum(totalvotes, na.rm = TRUE)) %>%
    # Pivot wider
  pivot_wider(
    id_cols = c(fips),
    names_from = c(party, year),
    names_sep = "_",
    values_from = voteshare) %>%
  write_csv("raw_data/county_elections.csv")
```


## 1.5 Google Mobility data

```{r}
# Specify import settings, to make sure read_csv() doesn't affect the file
settings = list(country_region_code = col_character(), 
                country_region = col_character(), 
                sub_region_1 = col_character(), 
                sub_region_2 = col_character(), 
                metro_area = col_character(), 
                iso_3166_2_code = col_character(),
                census_fips_code = col_character(),
                date = col_datetime(),
                col_double(), col_double(), 
                col_double(), col_double(), 
                col_double())

# Download US results
read_csv("https://www.gstatic.com/covid19/mobility/Global_Mobility_Report.csv",
         col_types = settings) %>% 
  # Fiter to just actual county entries
  filter(country_region_code == "US",  !is.na(sub_region_2)) %>%
  # Remove unnecessary fields
  select(state = sub_region_1, county = sub_region_2, 
         fips = census_fips_code, date, 
         retail_recreation = retail_and_recreation_percent_change_from_baseline,
         grocery_pharmacy = grocery_and_pharmacy_percent_change_from_baseline,
         parks = parks_percent_change_from_baseline,
         transit = transit_stations_percent_change_from_baseline,
         residential = residential_percent_change_from_baseline,
         workplaces = workplaces_percent_change_from_baseline) %>%
  # get rid of the words "County" or "Parish" for easy joining with other datasets
  # mutate(county = county %>% str_remove(" County| Parish")) %>%
  # Turn date into a "Date" format for easy joining with datasets
  mutate(date = as.Date(date)) %>%
  # export to file
  saveRDS("raw_data/google_us.rds")

```

```{r}
daterange <- c(as.Date("2020-02-01"), 
  read_csv("excess_deaths/excess_deaths_period.csv")$date) %>%
  unique() %>%
  sort() %>%
  as.data.frame() %>%
  rename(start = 1) %>%
  mutate(end = lead(start, 1)) %>%
  filter(!is.na(end)) %>%
  mutate(id = 1:n()) 

get_mobility = function(data){
  # Print the number of rows we have processed
  print(data$id[1])
  
read_rds("google_us.rds") %>%
  select(-state, -county) %>%
  # Arrange like this: NYC 2020-02-01, NYC 2020-02-02, etc.
  arrange(fips, date) %>%
  # For each county,
  group_by(fips) %>%
  # Lag the mobility outcomes by 5 days 
  # (since it takes a median of 5 days for symptoms to occur)
  mutate_at(vars(retail_recreation:workplaces),
            funs(lag(., 5))) %>%
  # Filter mobility records to just those on or 
  # after the start date of the period, 
  # and before the end date of the period.
  filter(date >= data$start,
         date < data$end) %>%
  # Pivot into fewer columns for tidy calculation
  pivot_longer(
    cols = -c(fips, date),
    names_to = "measure",
    values_to = "value", 
    values_drop_na = TRUE) %>%
  # For each county and type of mobility,
  group_by(fips, measure) %>%
  # Calculate the mean percent change in mobility during the period
  summarize(
    value = mean(value, na.rm = TRUE)) %>%
  # Droppining NaN values were no valid observations could be averaged
  filter(!is.nan(value)) %>%
  ungroup() %>%
  # and pivot back our into a wide matrix for easy reading,
  pivot_wider(
    id_cols = c(fips),
    names_from = measure,
    values_from = value) %>%
  # adding the start and end date of the period to the record
  mutate(start = data$start,
         end = data$end) %>%
  return()
}

daterange %>%
  split(.$id) %>%
  map_dfr(~get_mobility(.)) %>%
  saveRDS("excess_deaths/average_google_mobility.rds")

```





## 1.6 County Health Rankings

```{r, message = FALSE, warning = FALSE}
# Use this website
# https://www.countyhealthrankings.org/explore-health-rankings/rankings-data-documentation

read_csv("raw_data/chr_2020.csv") %>%
  # remove anything other than the raw measures
  dplyr::select(!matches("CI low|CI high|numerator|denominator")) %>%
  # remove extra variable names
  slice(-1) %>%
  # Remove state or country level summaries
  filter(`County FIPS Code` != "000") %>%
  # Make all variables numeric, except identifiers
  mutate_at(vars(-c(1:7)), (funs(. %>% as.numeric))) %>%
  # select variables
  dplyr::select(state = `State Abbreviation`, 
                fips = `5-digit FIPS Code`, county = Name,
         # Documentation available here:
         # https://www.countyhealthrankings.org/explore-health-rankings/measures-data-sources/2020-measures
         
         # Years of potential life lost before age 75 per 100,000 population (age-adjusted). (2016-2018)
         premature_death = `Premature death raw value`,  
         premature_death_black = `Premature death (Black)`,
         premature_death_hispanic = `Premature death (Hispanic)`,
         premature_death_white = `Premature death (White)`,
         # Percentage of adults reporting fair or poor health (age-adjusted) 2017
         poor_fair_health = `Poor or fair health raw value`, 
         # Average number of physically unhealthy days reported in past 30 days (age-adjusted). 2017
         poor_phys_health_days = `Poor physical health days raw value`,
         # Average number of mentally unhealthy days reported in past 30 days (age-adjusted) 2017
         poor_ment_health_days = `Poor mental health days raw value`,
         # Percentage of adults who are current smokers. 2017
         smoking = `Adult smoking raw value`, 
         # Percentage of the adult population (age 20 and older) 
         # that reports a body mass index (BMI) greater than or equal to 30 kg/m2. (2016)
         obesity = `Adult obesity raw value`,
         # Percentage of adults aged 20 and above with diagnosed diabetes. (2016)
         diabetes = `Diabetes prevalence raw value`, 
         # Index of factors that contribute to a healthy food environment,
         # from 0 (worst) to 10 (best). (2015-2017)
         food_env_index = `Food environment index raw value`,
         # Percentage of adults age 20 and over reporting no leisure-time physical activity. (2016)
         physical_inactivity = `Physical inactivity raw value`,
         # Percentage of population with adequate access to locations for physical activity. (2010, 2019)
         exercise_access = `Access to exercise opportunities raw value`,
         # Percentage of adults reporting binge or heavy drinking (2017)
         drinking_excessive = `Excessive drinking raw value`, 
         # Percentage of driving deaths with alcohol involvement. (2014-2018)
         alcohol_driving_deaths = `Alcohol-impaired driving deaths raw value`,
         # Number of newly diagnosed chlamydia cases per 100,000 population. (2017) 
         sexually_transmitted_infections = `Sexually transmitted infections raw value`,
         # Percentage of population under age 65 without health insurance. (2017)
         uninsured = `Uninsured raw value`,
         #  rate of number of primary care providers/100,000 population (2017)
         primary_care_physicians = `Primary care physicians raw value`,
         #  rate of number of dentistry providers/100,000 population (2018)
         dentists = `Dentists raw value`,
         #  rate of number of mental health providers/100,000 population (2019)
         mental_health_providers = `Mental health providers raw value`,
         # Rate of hospital stays for ambulatory-care sensitive conditions per 100,000 Medicare enrollees. (2017)
         prevent_hospital_stays = `Preventable hospital stays raw value`,
         # Rate of hospital stays for ambulatory-care sensitive conditions per 100,000 Medicare enrollees. (2017)
         prevent_hospital_stays_white = `Preventable hospital stays (White)`,
         # Rate of hospital stays for ambulatory-care sensitive conditions per 100,000 Medicare enrollees. (2017)
         prevent_hospital_stays_black = `Preventable hospital stays (Black)`,
        # `Percentage of fee-for-service (FFS) Medicare enrollees that had an annual flu vaccination.`
        flu_vaccinations = `Flu vaccinations raw value`,
        # Percentage of adults ages 25-44 with some post-secondary education. (2014-2018)
        some_college = `Some college raw value`,
        # Percentage of population ages 16 and older unemployed but seeking work. (2018)
        unemployment = `Unemployment raw value`,
        # Ratio of household income at the 80th percentile to income at the 20th percentile (2014-2018)
        income_inequality = `Income inequality raw value`,
        # Number of membership associations per 10,000 population. (2017)
        social_associations = `Social associations raw value`,
        # Number of reported violent crime offenses per 100,000 population. (2014-2016)
        violent_crime = `Violent crime raw value`,
        # `Number of deaths due to injury per 100,000 population.` (2014-2018)
        injury_deaths = `Injury deaths raw value`,
        # Average daily density of fine particulate matter in micrograms per cubic meter (PM2.5). (2014)
        air_pollution = `Air pollution - particulate matter raw value`,
        # Average number of years a person can expect to live. (2016-2018)
        life_expectancy = `Life expectancy raw value`,
        life_expectancy_black = `Life expectancy (Black)`,
        life_expectancy_white = `Life expectancy (White)`,
        life_expectancy_hisp = `Life expectancy (Hispanic)`,
        life_expectancy_asian = `Life expectancy (Asian/Pacific Islander)`,
        # Number of deaths among residents under age 75 per 100,000 population (age-adjusted). (2016-2018)
        premature_age_adjusted_mortality = `Premature age-adjusted mortality raw value`,
        
        # Percentage of adults reporting 14 or more days of poor physical health per month. (2017)
        frequent_phys_distress = `Frequent physical distress raw value`,
        # Percentage of adults reporting 14 or more days of poor mental health per month. (2017)
        frequent_ment_distress = `Frequent mental distress raw value`,
        # Number of people aged 13 years and older living with a diagnosis of
        # human immunodeficiency virus (HIV) infection per 100,000 population. (2016)
        hiv = `HIV prevalence raw value`,
        # `Number of drug poisoning deaths per 100,000 population. (2016-2018)
        drug_overdose_deaths = `Drug overdose deaths raw value`,
        # The income where half of households in a county earn more and half of households earn less. (2018)
        median_household_income = `Median household income raw value`,
        # Index of dissimilarity where higher values indicate greater residential
        # segregation between Black and White county residents. (2014-2018)
        residential_segregation_black_white = `Residential segregation - Black/White raw value`,
        # Index of dissimilarity where higher values indicate greater residential 
        # segregation between non-White and White county residents. (2014-2018)
        residential_segregation_nonwhite_white = `Residential segregation - non-White/White raw value`,
        # Percentage of occupied housing units that are owned. (2014-2018)
        homeownership = `Homeownership raw value`,
        # The following demographic variables all come from the American Community Survey 2014-2018
        pop = `Population raw value`,
        pop_age_under_18 = `% below 18 years of age raw value`,
        pop_age_65_plus = `% 65 and older raw value`,
        pop_black = `% Non-Hispanic Black raw value`,
        pop_nativeam = `% American Indian & Alaska Native raw value`,
        pop_asian = `% Asian raw value`,
        pop_pacific = `% Native Hawaiian/Other Pacific Islander raw value`,
        pop_hisp = `% Hispanic raw value`,
        pop_white = `% Non-Hispanic White raw value`, 
        pop_non_english_speaker = `% not proficient in English raw value`, # 2014-2018
        pop_female = `% Females raw value`,
        pop_rural = `% Rural raw value`) %>%
  # Now merge in social capital indices
  left_join(y = read_csv("raw_data/indices.csv"), by = "fips") %>%
  # Merge in area
  left_join(y = read_csv("raw_data/area.csv"), by = "fips") %>%
  # export
  write_csv("raw_data/chr.csv")

# Identify variables missing less than 5% of values
available <- read_csv("raw_data/chr.csv") %>% 
  pivot_longer(
    cols = -c(state, fips, county),
    names_to = "measure",
    values_to = "value") %>%
  group_by(measure) %>%
  summarize(available = sum(if_else(!is.na(value), 1, 0), na.rm = TRUE) / n()) %>%
  filter(available >= 0.95) %>%
  select(measure) %>% unlist() %>% unname()

# Now keep just those variables
read_csv("raw_data/chr.csv") %>%
  select(state, fips, county, available) %>%
  write_csv("raw_data/chr.csv")
```

## 1.7 Government Data

```{r}
library(tidyverse)
library(tidycensus)
library(censusapi)

# To download this data yourself, uncomment the code, register for a census api key, and go for it! Otherwise, just use the version we downloaded.

# myapikey <- "INSERTYOURCENSUSAPIKEYHERE"
#census_api_key(myapikey)

#vars <- load_variables(year = 2018, dataset = "acs5", cache = TRUE) %>%
#  mutate_at(vars(label, concept), funs(tolower(.)))

# Download number of municipal employees and population
#get_acs(geography = "county", 
#               survey = "acs5", 
#               year = 2018, 
#               variables = list(employee_muni = "B08128_006",
#                                pop = "B01001_001")) %>%
#  select(fips = GEOID, variable,estimate) %>%
#  pivot_wider(id_cols = fips, names_from = variable, values_from = estimate) %>%
  # Calculate number of municipal employees per 1000 residents
#  mutate(employee_muni = employee_muni / pop * 1000) %>%
#  select(-pop) %>%
#  saveRDS("raw_data/employee_muni_2018.rds")


```




## 1.8 Join dataset

```{r, message=FALSE, warning = FALSE}
# For each period, where date means the END date of the period,
# get the excess deaths
dat <- read_csv("excess_deaths/excess_deaths_period.csv") %>%
  select(-pop) %>%
  # Join in mobility, based on the end date of the period
  left_join(y = read_rds("excess_deaths/average_google_mobility.rds"),
            by = c("fips", "date" = "end")) %>%
  # Join in county election outcomes
  left_join(y = read_csv("raw_data/county_elections.csv"), by = "fips") %>%
  # Join in CHR data
  left_join(y = read_csv("raw_data/chr.csv"), by = "fips") %>%
  # Join in municipal employees
  left_join(by = "fips", y = read_rds("raw_data/employee_muni_2018.rds"))
  
# Reorder columns for easy viewing
bind_cols(
  dat %>%
    select(state, county, fips, start, date),
  dat %>%
    select(-state, -county, -fips, -start,  -date)) %>%
  saveRDS("excess_deaths/excess_deaths_dataset.rds")

remove(dat, daterange, settings,
       now, then, range, get_mobility, import)

```

## 1.9 Format Data

```{r}
### Format Dataset

# Format data
read_rds("excess_deaths/excess_deaths_dataset.rds") %>%
  # Since google mobility data is only available for 400 of these counties, 
  # we're just going to zoom into these cases,
  # and then use matching to get similar counties.
  select(-retail_recreation, -residential, -grocery_pharmacy, -transit, -parks) %>%
  # Make an extra variable
  mutate(pop_density = pop / area_land) %>%
  as.data.frame() %>%
  # Impute mean for missing data
  group_by(date) %>%
  mutate_at(vars(workplaces:pop_density), 
            funs(if_else(condition = is.na(.), 
                         true = mean(., na.rm = TRUE),
                         false = .))) %>%
  ungroup() %>%
  
  mutate(date = factor(date)) %>%
  # Fix any incorrect start dates
  mutate(start = date %>% dplyr::recode_factor(
    "2020-04-25" = "2020-02-01",
    "2020-05-16" = "2020-04-25",
    "2020-05-23" = "2020-05-16",
    "2020-08-22" = "2020-05-23",
    "2020-08-29" = "2020-08-22",
    "2020-09-26" = "2020-08-29")) %>%
  # Calculate the length of each period
  mutate(period = lubridate::interval(start = as.Date(start), end = as.Date(date)) %>% 
           lubridate::time_length(unit = "day")) %>%
  # Rescale all health variables into Z-scores
  mutate_at(vars(smoking, drinking_excessive, physical_inactivity,
                 diabetes, obesity, frequent_phys_distress, premature_age_adjusted_mortality,
                 prevent_hospital_stays, primary_care_physicians), funs(scale(.) %>% as.numeric)) %>%
  # Build a Health Quality score
  rowwise() %>%
  mutate(health_quality = sum( 
    smoking + drinking_excessive +physical_inactivity +
      diabetes + obesity + frequent_phys_distress +
      premature_age_adjusted_mortality) / 7) %>%
  # Build a Health Care Capacity score
  # remember to reverse the polarity of preventable_hospital_stays
  mutate(health_care_capacity = sum(-1*prevent_hospital_stays + primary_care_physicians) / 2) %>%
  ungroup() %>%
  # create a binary outcome for high or low social capital
  mutate(
    # Kyne and Aldrich indices
    socialcap_cat = ntile(socialcap, 2) - 1,
    bonding_cat = ntile(bonding, 2) - 1,
    bridging_cat = ntile(bridging, 2) - 1,
    linking_cat = ntile(linking, 2) - 1,
    all_cat = ntile(bonding*bridging*linking, 2) - 1,
    # Penn State indices
    sci_psu_cat = ntile(sci_psu, 2) - 1,
    # USJEC indices
    sci_usjec_cat = ntile(sci_usjec, 2) - 1,
    family_unity_usjec_cat = ntile(family_unity_usjec, 2) - 1,
    community_health_usjec_cat = ntile(community_health_usjec, 2) - 1,
    collective_efficacy_usjec_cat = ntile(collective_efficacy_usjec, 2) - 1,
    institutional_health_usjec_cat = ntile(institutional_health_usjec, 2) - 1) %>%
  # Save
  saveRDS("dataset.rds")



read_rds("excess_deaths/excess_deaths_dataset.rds") %>%
  # Since google mobility data is only available for 400 of these counties, 
  # we're just going to zoom into these cases,
  # and then use matching to get similar counties.
  select(-retail_recreation, -residential, -grocery_pharmacy, -transit, -parks) %>%
  # Make an extra variable
  mutate(pop_density = pop / area_land) %>%
  as.data.frame() %>%
  # Impute mean for missing data

  
  mutate(date = factor(date)) %>%
  # Fix any incorrect start dates
  mutate(start = date %>% dplyr::recode_factor(
    "2020-04-25" = "2020-02-01",
    "2020-05-16" = "2020-04-25",
    "2020-05-23" = "2020-05-16",
    "2020-08-22" = "2020-05-23",
    "2020-08-29" = "2020-08-22",
    "2020-09-26" = "2020-08-29")) %>%
  # Calculate the length of each period
  mutate(period = lubridate::interval(start = as.Date(start), end = as.Date(date)) %>% 
           lubridate::time_length(unit = "day")) %>%
  # Rescale all health variables into Z-scores
  mutate_at(vars(smoking, drinking_excessive, physical_inactivity,
                 diabetes, obesity, frequent_phys_distress, premature_age_adjusted_mortality,
                 prevent_hospital_stays, primary_care_physicians), 
            funs(scale(.) %>% as.numeric)) %>%
  # Build a Health Quality score
  rowwise() %>%
  # Count how many valid observations there are
  mutate(valid_obs = sum(!is.na(smoking), !is.na(drinking_excessive),
                         !is.na(physical_inactivity), !is.na(diabetes),
                         !is.na(obesity), !is.na(frequent_phys_distress),
                         !is.na(premature_age_adjusted_mortality))) %>%
  mutate(health_quality = sum(smoking, drinking_excessive, physical_inactivity,
      diabetes, obesity, frequent_phys_distress,
      premature_age_adjusted_mortality, na.rm = TRUE) / valid_obs) %>%
  # Build a Health Care Capacity score
  # remember to reverse the polarity of preventable_hospital_stays
  mutate(valid_obs = sum(!is.na(prevent_hospital_stays), !is.na(primary_care_physicians))) %>%
 mutate(health_care_capacity = sum(-1*prevent_hospital_stays, primary_care_physicians, na.rm = TRUE) / valid_obs) %>%
  ungroup() %>%
  select(-valid_obs) %>%
  # create a binary outcome for high or low social capital
  mutate(
    # Kyne and Aldrich indices
    socialcap_cat = ntile(socialcap, 2) - 1,
    bonding_cat = ntile(bonding, 2) - 1,
    bridging_cat = ntile(bridging, 2) - 1,
    linking_cat = ntile(linking, 2) - 1,
    all_cat = ntile(bonding*bridging*linking, 2) - 1,
    # Penn State indices
    sci_psu_cat = ntile(sci_psu, 2) - 1,
    # USJEC indices
    sci_usjec_cat = ntile(sci_usjec, 2) - 1,
    family_unity_usjec_cat = ntile(family_unity_usjec, 2) - 1,
    community_health_usjec_cat = ntile(community_health_usjec, 2) - 1,
    collective_efficacy_usjec_cat = ntile(collective_efficacy_usjec, 2) - 1,
    institutional_health_usjec_cat = ntile(institutional_health_usjec, 2) - 1) %>%
  # Save
  saveRDS("dataset_unimputed.rds")
```


Finally, let's also just get a dataset representing the entire population of US counties for comparison.

```{r}
dat <- expand_grid(
  fips = read_csv("raw_data/chr.csv")$fips %>% unique(),
  date = read_csv("excess_deaths/excess_deaths_period.csv")$date %>% unique()) %>%
  # Join in County Health Rankins and indices
  left_join(by = "fips", y = read_csv("raw_data/chr.csv")) %>%
  # Join in mobility, based on the end date of the period
  left_join(y = read_rds("excess_deaths/average_google_mobility.rds"),
            by = c("fips", "date" = "end")) %>%
  # Join in county election outcomes
  left_join(y = read_csv("raw_data/county_elections.csv"), by = "fips") %>%
  # Join in municipal employees
  left_join(by = "fips", y = read_rds("raw_data/employee_muni_2018.rds"))
  
# Reorder columns for easy viewing
dat <- bind_cols(
  dat %>%
    select(state, county, fips, start, date),
  dat %>%
    select(-state, -county, -fips, -start,  -date)) 

dat %>%
  # Since google mobility data is only available for 400 of these counties, 
  # we're just going to zoom into these cases,
  # and then use matching to get similar counties.
  select(-retail_recreation, -residential, -grocery_pharmacy, -transit, -parks) %>%
  # Make an extra variable
  mutate(pop_density = pop / area_land) %>%
  as.data.frame() %>%
  # Impute mean for missing data
  mutate(date = factor(date)) %>%
  # Fix any incorrect start dates
  mutate(start = date %>% dplyr::recode_factor(
    "2020-04-25" = "2020-02-01",
    "2020-05-16" = "2020-04-25",
    "2020-05-23" = "2020-05-16",
    "2020-08-22" = "2020-05-23",
    "2020-08-29" = "2020-08-22",
    "2020-09-26" = "2020-08-29")) %>%
  # Calculate the length of each period
  mutate(period = lubridate::interval(start = as.Date(start), end = as.Date(date)) %>% 
           lubridate::time_length(unit = "day")) %>%
  # Rescale all health variables into Z-scores
  mutate_at(vars(smoking, drinking_excessive, physical_inactivity,
                 diabetes, obesity, frequent_phys_distress, premature_age_adjusted_mortality,
                 prevent_hospital_stays, primary_care_physicians), 
            funs(scale(.) %>% as.numeric)) %>%
  # Build a Health Quality score
  rowwise() %>%
  # Count how many valid observations there are
  mutate(valid_obs = sum(!is.na(smoking), !is.na(drinking_excessive),
                         !is.na(physical_inactivity), !is.na(diabetes),
                         !is.na(obesity), !is.na(frequent_phys_distress),
                         !is.na(premature_age_adjusted_mortality))) %>%
  mutate(health_quality = sum(smoking, drinking_excessive, physical_inactivity,
      diabetes, obesity, frequent_phys_distress,
      premature_age_adjusted_mortality, na.rm = TRUE) / valid_obs) %>%
  # Build a Health Care Capacity score
  # remember to reverse the polarity of preventable_hospital_stays
  mutate(valid_obs = sum(!is.na(prevent_hospital_stays), !is.na(primary_care_physicians))) %>%
 mutate(health_care_capacity = sum(-1*prevent_hospital_stays, primary_care_physicians, na.rm = TRUE) / valid_obs) %>%
  ungroup() %>%
  select(-valid_obs) %>%
  # create a binary outcome for high or low social capital
  mutate(
    # Kyne and Aldrich indices
    socialcap_cat = ntile(socialcap, 2) - 1,
    bonding_cat = ntile(bonding, 2) - 1,
    bridging_cat = ntile(bridging, 2) - 1,
    linking_cat = ntile(linking, 2) - 1,
    all_cat = ntile(bonding*bridging*linking, 2) - 1,
    # Penn State indices
    sci_psu_cat = ntile(sci_psu, 2) - 1,
    # USJEC indices
    sci_usjec_cat = ntile(sci_usjec, 2) - 1,
    family_unity_usjec_cat = ntile(family_unity_usjec, 2) - 1,
    community_health_usjec_cat = ntile(community_health_usjec, 2) - 1,
    collective_efficacy_usjec_cat = ntile(collective_efficacy_usjec, 2) - 1,
    institutional_health_usjec_cat = ntile(institutional_health_usjec, 2) - 1) %>%
  # Save
  saveRDS("dataset_pop_unimputed.rds")
remove(dat, daterange, settings,
       now, then, range, get_mobility, import)
```

# 2. Analysis

## 2.1 Import Data

```{r}
# Import data!
dat <- read_rds("dataset.rds") %>%
  # Format all numeric variables
  # Rescale each from 1 to 10
  mutate_at(vars(workplaces:pop_density),
            funs(scales::rescale(., to = c(1,10)))) 
```

## 2.2 Descriptives

### Figure 1: Histograms

```{r}
# Let's briefly examine the distributions of these periods
dat %>% 
  # Format date range for panels
  mutate(start = paste(str_sub(start, 6,7), "/", str_sub(start, 9, 10), sep = "")) %>%
  mutate(end = paste(str_sub(date, 6,7), "/", str_sub(date, 9, 10), sep = "")) %>%
  mutate(date = paste(start, " - ", end, sep = "")) %>% 
  # Pivot into a tidy data.frame
  pivot_longer(cols = contains("period_rate"),
               names_to = "type", values_to = "outcome") %>%
  # rename
  mutate(type = type %>% recode_factor(
    "excess_deaths_1yr_period_rate" = "1-year range\n(2019)",
    "excess_deaths_3yr_period_rate" = "3-year average\n(2017-2019)",
    "excess_deaths_5yr_period_rate" = "5-year average\n(2015-2019)")) %>%
  ggplot(mapping = aes(x = outcome, 
                       fill = type)) +
  geom_histogram(color = "white", size = 0.25, alpha = 0.25, 
                 position = "identity") +
  facet_wrap(~date, scales = "free") +
  theme_classic(base_size = 14) +
  theme(panel.border = element_rect(color = "black", fill = NA),
        legend.position = "bottom") +
  scale_fill_manual(values = c("#648FFF",  "#DC267F", "#FFB000")) +
  labs(y = "Frequence (# of Counties)",
       x = "Excess Death Rate per 100,000 persons",
       fill = "Year of Comparison") +
  ggsave("viz/figure_1.png", dpi = 500, height = 5, width = 8)
```


# 3. Basic Models

## 3.1 Individual Models

```{r}
# Import data!
dat <- read_rds("dataset.rds") %>%
  # Format all numeric variables
  # Rescale each from 1 to 10
  mutate_at(vars(workplaces:pop_density),
            funs(scales::rescale(., to = c(1,10)))) 

# Get models of single time slices 
m1 <- dat %>%
  split(.$date) %>%
  map(~lm(formula = excess_deaths_5yr_period_rate ~ 
       socialcap +
         democrat_2016 + workplaces +  
       pop_density +  health_care_capacity + uninsured + health_quality +
       pop_age_65_plus +  pop_female +  pop_black + #pop_hisp + 
       median_household_income + some_college + employee_muni, data = .))
# Kyne & Aldrich, broken into three subindices
m2 <- dat %>%
  split(.$date) %>%
  map(~lm(formula = excess_deaths_5yr_period_rate ~ 
       bonding + bridging + linking + 
       democrat_2016 + workplaces +  
       pop_density +  health_care_capacity + uninsured + health_quality +
       pop_age_65_plus +  pop_female +  pop_black + #pop_hisp + 
       median_household_income + some_college + employee_muni, data = .))
# Penn
m3 <- dat %>%
  split(.$date) %>%
  map(~lm(formula = excess_deaths_5yr_period_rate ~ 
       sci_psu + 
       democrat_2016 + workplaces +  
       pop_density +  health_care_capacity + uninsured + health_quality +
       pop_age_65_plus +  pop_female +  pop_black + #pop_hisp + 
       median_household_income + some_college + employee_muni, data = .))
# USJEC
m4 <- dat %>%
  split(.$date) %>%
  map(~lm(formula = excess_deaths_5yr_period_rate ~ 
       sci_usjec + 
       democrat_2016 + workplaces +  
       pop_density +  health_care_capacity + uninsured + health_quality +
       pop_age_65_plus +  pop_female +  pop_black + #pop_hisp + 
       median_household_income + some_college + employee_muni, data = .))
# USJEC subindices
m5 <- dat %>%
  # In order to avoid colinearity by health quality ,race, and income,
  # must break this index into 10 pieces
  mutate(family_unity_usjec = ntile(family_unity_usjec, 10) %>% 
           scales::rescale(to = c(1,10))) %>%
  split(.$date) %>%
  map(~lm(formula = excess_deaths_5yr_period_rate ~ 
       family_unity_usjec + community_health_usjec + 
        institutional_health_usjec + collective_efficacy_usjec +
       democrat_2016 + workplaces +  
       pop_density +  health_care_capacity + uninsured + health_quality +
       pop_age_65_plus +  pop_female +  pop_black + #pop_hisp + 
       median_household_income + some_college + employee_muni, data = .))

# Calculate the highest VIF among them; all are below 10, some around 7. Pretty okay.
# for example:
#m5 %>%
#  map(~car::vif(.) )

```

### Kyne & Aldrich

```{r}

texreg::htmlreg(
  c(m1,m2),
  bold = 0.10,
  stars = c(0.001, 0.01, 0.05, 0.10),
  caption.above = TRUE,
  caption = "<b>OLS models of Excess Death Rates for US Counties by Time Period</b><br><i>Based on up to 941 counties observed between February 1 and September 26</i>",
  custom.header = list("Using Kyne & Aldrich Social Capital Indices<br>Overall Social Capital Index" = 1:6,
                       "Using Kyne & Aldrich Social Capital Indices<br>Subindices by Type of Social Capital" = 7:12),
  custom.model.names = rep(c("2/1 - 4/25", "4/26 - 5/16", "5/17 - 5/23", 
                         "5/24 - 8/22", "8/23 - 8/29", "8/30 - 9/26"), 2),
  file = "viz/table_1.html",
  custom.coef.map = list(
    # Social Capital Effects
    "socialcap" = "Social Capital",
    "bonding" = "Bonding Social Capital",
    "bridging" = "Bridging Social Capital",
    "linking" = "Linking Social Capital",
    # Mobility
    "workplaces" = "Average % Change in Mobility (Workplaces)",
    # Governance
        "democrat_2016" = "% Voted Democrat in 2016 Presidential Election",
    "employee_muni" = "Municipal Employees per 1000 residents",
    # Health Care Capacity
    "health_care_capacity" = "Health Care Capacity Index",
    "uninsured" = "% Uninsured",
    # Health Conditions
    "health_quality" = "Quality of Health Index",
    # Socioeconomics
        "median_household_income" = "Median Household Income",
    "some_college" = "% Some college or more",
    # Demographics
  "pop_age_65_plus" = "% Age 65 or over",
    "pop_female" = "% Women",
    "pop_black" = "% Black",
    "pop_density" = "Population Density",
    "(Intercept)" = "Constant"),
  groups = list("<b>Social Capital</b>" = 1:4,
                "<b>Mobility</b>" = 5,
                "<b>Politics & Governance</b>" = 6:7,
                "<b>Health Care</b>" = 8:10,
                "<b>Socioeconomics</b>" = 11:12,
                "<b>Demographics</b>" = 13:17),
  include.fstat = TRUE,
  custom.note = "*** signifies statistical significance at p < 0.001, ** at p < 0.01, * at p < 0.05, and . at p < 0.10. Effect sizes have been standardized from 1 (min) to 10 (max) and can be compared among continuous variables. All variance inflation factor (VIF) scores were below 10, the threshold for problematic colinearity, with a max of 7.5.")

# For USJEC subindex models, family unity was broken into 10 quantiles to avoid colinearity problems with income, health, and race covariates. 
```

### Alternatives

```{r}

texreg::htmlreg(
  c(m3,m4,m5),
  bold = 0.10,
  stars = c(0.001, 0.01, 0.05, 0.10),
  caption.above = TRUE,
  caption = "<b>OLS models of Excess Death Rates for US Counties by Time Period</b><br><i>Based on up to 941 counties observed between February 1 and September 26</i>",
  custom.header = list("Using Rupasingha et al.'s Social Capital Index<br>Overall Social Capital Index" = 1:6,
                       "Using USJEC's Social Capital Index<br>Overall Social Capital Index" = 7:12,
                       "Using USJEC's Social Capital Index<br>Subindices by Type of Social Capital" = 13:18),
  custom.model.names = rep(c("2/1 - 4/25", "4/26 - 5/16", "5/17 - 5/23", 
                         "5/24 - 8/22", "8/23 - 8/29", "8/30 - 9/26"), 3),
  file = "viz/table_2.html",
  custom.coef.map = list(
    # Social Capital Effects
    "socialcap" = "Social Capital",
    "sci_psu" = "Social Capital",
    "sci_usjec" = "Social Capital",
    # types
    "bonding" = "Bonding Social Capital",
    "bridging" = "Bridging Social Capital",
    "linking" = "Linking Social Capital",
    # other types
    "family_unity_usjec" = "Family Unity Index",
    "community_health_usjec" = "Community Health Index",
    "collective_efficacy_usjec" = "Collective Efficacy Index",
    "institutional_health_usjec" = "Institutional Health Index",
    # Mobility
    "workplaces" = "Average % Change in Mobility (Workplaces)",
    # Governance
        "democrat_2016" = "% Voted Democrat in 2016 Presidential Election",
    "employee_muni" = "Municipal Employees per 1000 residents",
    # Health Care Capacity
    "health_care_capacity" = "Health Care Capacity Index",
    "uninsured" = "% Uninsured",
    # Health Conditions
    "health_quality" = "Quality of Health Index",
    # Socioeconomics
        "median_household_income" = "Median Household Income",
    "some_college" = "% Some college or more",
    # Demographics
  "pop_age_65_plus" = "% Age 65 or over",
    "pop_female" = "% Women",
    "pop_black" = "% Black",
    "pop_density" = "Population Density",
    "(Intercept)" = "Constant"),
  groups = list("<b>Social Capital</b>" = 1:5,
                "<b>Mobility</b>" = 6,
                "<b>Politics & Governance</b>" = 7:8,
                "<b>Health Care</b>" = 9:11,
                "<b>Socioeconomics</b>" = 12:13,
                "<b>Demographics</b>" = 14:18),
  include.fstat = TRUE,
  custom.note = "*** signifies statistical significance at p < 0.001, ** at p < 0.01, * at p < 0.05, and . at p < 0.10. Effect sizes have been standardized from 1 (min) to 10 (max) and can be compared among continuous variables. All variance inflation factor (VIF) scores were below 10, the threshold for problematic colinearity, with a max of 7.5. For USJEC subindex models, family unity was broken into 10 quantiles to avoid colinearity problems with income, health, and race covariates.")
```

## 3.2 Unbalanced Panel Models

### Working Correlation

```{r}

# Import data!
dat <- read_rds("dataset.rds") %>%
  # Adjust for colinearity
  mutate_at(vars(health_care_capacity,
               health_quality,
               some_college, family_unity_usjec),
          funs(ntile(., 8))) %>%
  # Format all numeric variables
  # Rescale each from 1 to 10
  mutate_at(vars(workplaces:pop_density),
            funs(scales::rescale(., to = c(1,10))))  %>%
  mutate(date = factor(date)) %>%
  arrange(fips, date) %>%
  as.data.frame()

# Let's identify the correct working correlation structure using the MESS package,
# we'll get an independent/unstructed one, an exchangeable one, and one with AR-1 autocorrelation,
# and we'll compare which produces the lowest quasi-likelihood estimate (QIC)

m1 <- dat %>%
  geeglm(formula = excess_deaths_5yr_period_rate ~ 
           # political variables  
          bonding + bridging + linking + 
          democrat_2016 + 
          # control for mobility and pop density
          workplaces + pop_density +
          # control for health care, conditions, and behaviors
          health_care_capacity + uninsured + health_quality +
          # demographic and socioeconomics
          pop_age_65_plus +  pop_female +  pop_black + #pop_hisp + 
          # basic socioeconomics and govt capacity  
          median_household_income + some_college + employee_muni, 
         family = "gaussian", id = fips, 
         corstr = "exchangeable", std.err="san.se") 

m1c <- dat %>%
  geeglm(formula = excess_deaths_5yr_period_rate ~ 
           # political variables  
          bonding + bridging + linking + 
          democrat_2016 + 
          # control for mobility and pop density
          workplaces + pop_density +
          # control for health care, conditions, and behaviors
          health_care_capacity + uninsured + health_quality +
          # demographic and socioeconomics
          pop_age_65_plus +  pop_female +  pop_black + #pop_hisp + 
          # basic socioeconomics and govt capacity  
          median_household_income + some_college + employee_muni, 
         family = "gaussian", id = fips, corstr = "ar1", std.err="san.se") 


m1d <- dat %>%
  geeglm(formula = excess_deaths_5yr_period_rate ~ 
           # political variables  
          bonding + bridging + linking + 
          democrat_2016 + 
          # control for mobility and pop density
          workplaces + pop_density +
          # control for health care, conditions, and behaviors
          health_care_capacity + uninsured + health_quality +
          # demographic and socioeconomics
          pop_age_65_plus +  pop_female +  pop_black + #pop_hisp + 
          # basic socioeconomics and govt capacity  
          median_household_income + some_college + employee_muni, 
         family = "gaussian", id = fips, corstr = "independece", std.err="san.se") 


m1d <- dat %>%
  geeglm(formula = excess_deaths_5yr_period_rate ~ 
           # political variables  
          bonding + bridging + linking + 
          democrat_2016 + 
          # control for mobility and pop density
          workplaces + pop_density +
          # control for health care, conditions, and behaviors
          health_care_capacity + uninsured + health_quality +
          # demographic and socioeconomics
          pop_age_65_plus +  pop_female +  pop_black + #pop_hisp + 
          # basic socioeconomics and govt capacity  
          median_household_income + some_college + employee_muni, 
         family = "gaussian", id = fips, 
         corstr = "unstructured", std.err="san.se") 


#http://finzi.psych.upenn.edu/R/library/MuMIn/html/QIC.html
list(m1,m1b,m1c,m1d) %>%
  map(~MESS::QIC(.) %>% round(0)) %>%
  bind_rows(.id = "model")

# Independence has the lowest - but that refers to no correlation structure,
# meaning it is just linear regression - 
# and we already showcase linear regression with fixed effects models. 
# The next smallest is exchangeable; 
# they are very, very close to independence. We'll go with exchangeable.
remove(m1b,m1c, m1d)
```

### Models

```{r}

# Import data!
dat <- read_rds("dataset.rds") %>%
  # Adjust for colinearity
  mutate_at(vars(health_care_capacity,
               health_quality,
               some_college, family_unity_usjec),
          funs(ntile(., 8))) %>%
  # Format all numeric variables
  # Rescale each from 1 to 10
  mutate_at(vars(workplaces:pop_density),
            funs(scales::rescale(., to = c(1,10))))  %>%
  mutate(date = factor(date)) %>%
  arrange(fips, date) %>%
  as.data.frame()

       
# Let's also calculate this as a fixed effects model
m1 <- dat %>%
  lm(formula = excess_deaths_5yr_period_rate ~ 
           # political variables  
          bonding + bridging + linking + 
          democrat_2016 + 
          # control for mobility and pop density
          workplaces + pop_density +
          # control for health care, conditions, and behaviors
          health_care_capacity + uninsured + health_quality +
          # demographic and socioeconomics
          pop_age_65_plus +  pop_female +  pop_black + #pop_hisp + 
          # basic socioeconomics and govt capacity  
          median_household_income + some_college + employee_muni + factor(date))

# Let's also generate a random effects model
m2 <- dat %>%
  lmer(formula = excess_deaths_5yr_period_rate ~ 
           # political variables  
          bonding + bridging + linking + 
          democrat_2016 + 
          # control for mobility and pop density
          workplaces + pop_density +
          # control for health care, conditions, and behaviors
          health_care_capacity + uninsured + health_quality +
          # demographic and socioeconomics
          pop_age_65_plus +  pop_female +  pop_black + #pop_hisp + 
          # basic socioeconomics and govt capacity  
          median_household_income + some_college + employee_muni +
         (1 | date)) 
# Now reestimate GEE with Zelig
m3 <- dat %>%
  zelig(formula = excess_deaths_5yr_period_rate ~ 
           # political variables  
          bonding + bridging + linking + 
          democrat_2016 + 
          # control for mobility and pop density
          workplaces + pop_density +
          # control for health care, conditions, and behaviors
          health_care_capacity + uninsured + health_quality +
          # demographic and socioeconomics
          pop_age_65_plus +  pop_female +  pop_black + #pop_hisp + 
          # basic socioeconomics and govt capacity  
          median_household_income + some_college + employee_muni, 
        model = "normal.gee", corstr = "exchangeable", id = "fips")

texreg::htmlreg(
  list(m1,m2,m3),
  bold = 0.10,
  stars = c(0.001, 0.01, 0.05, 0.10),
  caption.above = TRUE,
  caption = "<b>Panel OLS models of Excess Death Rates for US Counties by Time Period (n = 3416)</b><br><i>Based on an Unbalanced Panel Dataset of up to 941 counties observed<br>between February 1 and September 26 (time periods = 6)</i>",
  custom.header = list("Using Kyne & Aldrich's Social Capital Index<br>Overall Social Capital Index" = 1:3),
  custom.model.names = rep(c("Fixed<br>Effects", "Random<br>Effects", "GEE<br>(Exchangeable)"), 1),
  file = "viz/table_3.html",
  custom.coef.map = list(
    # Social Capital Effects
    "socialcap" = "Social Capital",
    "sci_psu" = "Social Capital",
    "sci_usjec" = "Social Capital",
    # types
    "bonding" = "Bonding Social Capital",
    "bridging" = "Bridging Social Capital",
    "linking" = "Linking Social Capital",
    # other types
    "family_unity_usjec" = "Family Unity Index",
    "community_health_usjec" = "Community Health Index",
    "collective_efficacy_usjec" = "Collective Efficacy Index",
    "institutional_health_usjec" = "Institutional Health Index",
    # Mobility
    "workplaces" = "Average % Change in Mobility (Workplaces)",
    # Governance
    "democrat_2016" = "% Voted Democrat in 2016 Presidential Election",
    "employee_muni" = "Municipal Employees per 1000 residents",
    # Health Care Capacity
    "health_care_capacity" = "Health Care Capacity Index",
    "uninsured" = "% Uninsured",
    # Health Conditions
    "health_quality" = "Quality of Health Index",
    # Socioeconomics
    "median_household_income" = "Median Household Income",
    "some_college" = "% Some college or more",
    # Demographics
    "pop_age_65_plus" = "% Age 65 or over",
    "pop_female" = "% Women",
    "pop_black" = "% Black",
    "pop_density" = "Population Density",
    "(Intercept)" = "Constant"),
  groups = list("<b>Social Capital</b>" = 1:3,
                "<b>Mobility</b>" = 4,
                "<b>Politics & Governance</b>" = 5:6,
                "<b>Health Care</b>" = 7:9,
                "<b>Socioeconomics</b>" = 10:11,
                "<b>Demographics</b>" = 12:16),
  custom.gof.rows = list(
    "R2 / Conditional R2" = list(m1,m2,m3) %>%
      map(~performance::r2(.)[1]) %>% unlist(),
    "Adj. R2 / Marginal R2" = list(m1,m2,m3) %>%
      map(~performance::r2(.)[2]) %>% unlist()),
  include.fstat = TRUE,
  include.aic = FALSE, include.bic = FALSE, include.loglik = FALSE,
  include.rsqu = FALSE, include.adjr = FALSE, include.groups = FALSE,
  custom.note = "*** signifies statistical significance at p < 0.001, ** at p < 0.01, * at p < 0.05, and . at p < 0.10. Effect sizes have been standardized from 1 (min) to 10 (max) and can be compared among continuous variables. All variance inflation factor (VIF) scores were below 10, the threshold for problematic colinearity, with a max of 7.7. Health quality, health care capacity, and income were broken into 8 quantiles to avoid colinearity problems.")

```
### Alternatives

```{r}

# Import data!
dat <- read_rds("dataset.rds") %>%
  # Adjust for colinearity
  mutate_at(vars(health_care_capacity,
               health_quality,
               some_college, family_unity_usjec),
          funs(ntile(., 8))) %>%
  # Format all numeric variables
  # Rescale each from 1 to 10
  mutate_at(vars(workplaces:pop_density),
            funs(scales::rescale(., to = c(1,10))))  %>%
  mutate(date = factor(date)) %>%
  arrange(fips, date) %>%
  as.data.frame()

# Get it for overall social capital for Kyne and Aldrich
# Let's also calculate this as a fixed effects model
m1a <- dat %>%
  lm(formula = excess_deaths_5yr_period_rate ~ 
           # political variables  
          socialcap + 
          democrat_2016 + 
          # control for mobility and pop density
          workplaces + pop_density +
          # control for health care, conditions, and behaviors
          health_care_capacity + uninsured + health_quality +
          # demographic and socioeconomics
          pop_age_65_plus +  pop_female +  pop_black + #pop_hisp + 
          # basic socioeconomics and govt capacity  
          median_household_income + some_college + employee_muni + factor(date))

# Let's also generate a random effects model
m1b <- dat %>%
  lmer(formula = excess_deaths_5yr_period_rate ~ 
           # political variables  
          socialcap + democrat_2016 + 
          # control for mobility and pop density
          workplaces + pop_density +
          # control for health care, conditions, and behaviors
          health_care_capacity + uninsured + health_quality +
          # demographic and socioeconomics
          pop_age_65_plus +  pop_female +  pop_black + #pop_hisp + 
          # basic socioeconomics and govt capacity  
          median_household_income + some_college + employee_muni +
         (1 | date)) 
# Now reestimate GEE with Zelig
m1c <- dat %>%
  zelig(formula = excess_deaths_5yr_period_rate ~ 
           # political variables  
          socialcap + democrat_2016 + 
          # control for mobility and pop density
          workplaces + pop_density +
          # control for health care, conditions, and behaviors
          health_care_capacity + uninsured + health_quality +
          # demographic and socioeconomics
          pop_age_65_plus +  pop_female +  pop_black + #pop_hisp + 
          # basic socioeconomics and govt capacity  
          median_household_income + some_college + employee_muni, 
        model = "normal.gee", corstr = "exchangeable", id = "fips")

    
# Repeat for each subtype   
# Let's also calculate this as a fixed effects model
m2a <- dat %>%
  lm(formula = excess_deaths_5yr_period_rate ~ 
           # political variables  
          bonding + bridging + linking + 
          democrat_2016 + 
          # control for mobility and pop density
          workplaces + pop_density +
          # control for health care, conditions, and behaviors
          health_care_capacity + uninsured + health_quality +
          # demographic and socioeconomics
          pop_age_65_plus +  pop_female +  pop_black + #pop_hisp + 
          # basic socioeconomics and govt capacity  
          median_household_income + some_college + employee_muni + factor(date))

# Let's also generate a random effects model
m2b <- dat %>%
  lmer(formula = excess_deaths_5yr_period_rate ~ 
           # political variables  
          bonding + bridging + linking + 
          democrat_2016 + 
          # control for mobility and pop density
          workplaces + pop_density +
          # control for health care, conditions, and behaviors
          health_care_capacity + uninsured + health_quality +
          # demographic and socioeconomics
          pop_age_65_plus +  pop_female +  pop_black + #pop_hisp + 
          # basic socioeconomics and govt capacity  
          median_household_income + some_college + employee_muni +
         (1 | date)) 
# Now reestimate GEE with Zelig
m2c <- dat %>%
  zelig(formula = excess_deaths_5yr_period_rate ~ 
           # political variables  
          bonding + bridging + linking + 
          democrat_2016 + 
          # control for mobility and pop density
          workplaces + pop_density +
          # control for health care, conditions, and behaviors
          health_care_capacity + uninsured + health_quality +
          # demographic and socioeconomics
          pop_age_65_plus +  pop_female +  pop_black + #pop_hisp + 
          # basic socioeconomics and govt capacity  
          median_household_income + some_college + employee_muni, 
        model = "normal.gee", corstr = "exchangeable", id = "fips")

 
# Get it for PSU
# Let's also calculate this as a fixed effects model
m3a <- dat %>%
  lm(formula = excess_deaths_5yr_period_rate ~ 
           # political variables  
          sci_psu + democrat_2016 + 
          # control for mobility and pop density
          workplaces + pop_density +
          # control for health care, conditions, and behaviors
          health_care_capacity + uninsured + health_quality +
          # demographic and socioeconomics
          pop_age_65_plus +  pop_female +  pop_black + #pop_hisp + 
          # basic socioeconomics and govt capacity  
          median_household_income + some_college + employee_muni + factor(date))

# Let's also generate a random effects model
m3b <- dat %>%
  lmer(formula = excess_deaths_5yr_period_rate ~ 
           # political variables  
          sci_psu + democrat_2016 + 
          # control for mobility and pop density
          workplaces + pop_density +
          # control for health care, conditions, and behaviors
          health_care_capacity + uninsured + health_quality +
          # demographic and socioeconomics
          pop_age_65_plus +  pop_female +  pop_black + #pop_hisp + 
          # basic socioeconomics and govt capacity  
          median_household_income + some_college + employee_muni +
         (1 | date)) 
# Now reestimate GEE with Zelig
m3c <- dat %>%
  zelig(formula = excess_deaths_5yr_period_rate ~ 
           # political variables  
          sci_psu + democrat_2016 + 
          # control for mobility and pop density
          workplaces + pop_density +
          # control for health care, conditions, and behaviors
          health_care_capacity + uninsured + health_quality +
          # demographic and socioeconomics
          pop_age_65_plus +  pop_female +  pop_black + #pop_hisp + 
          # basic socioeconomics and govt capacity  
          median_household_income + some_college + employee_muni, 
        model = "normal.gee", corstr = "exchangeable", id = "fips")


# Get it for USJEC overall
# Let's also calculate this as a fixed effects model
m4a <- dat %>%
  lm(formula = excess_deaths_5yr_period_rate ~ 
           # political variables  
          sci_usjec + democrat_2016 + 
          # control for mobility and pop density
          workplaces + pop_density +
          # control for health care, conditions, and behaviors
          health_care_capacity + uninsured + health_quality +
          # demographic and socioeconomics
          pop_age_65_plus +  pop_female +  pop_black + #pop_hisp + 
          # basic socioeconomics and govt capacity  
          median_household_income + some_college + employee_muni + factor(date))

# Let's also generate a random effects model
m4b <- dat %>%
  lmer(formula = excess_deaths_5yr_period_rate ~ 
           # political variables  
          sci_usjec + democrat_2016 + 
          # control for mobility and pop density
          workplaces + pop_density +
          # control for health care, conditions, and behaviors
          health_care_capacity + uninsured + health_quality +
          # demographic and socioeconomics
          pop_age_65_plus +  pop_female +  pop_black + #pop_hisp + 
          # basic socioeconomics and govt capacity  
          median_household_income + some_college + employee_muni +
         (1 | date)) 
# Now reestimate GEE with Zelig
m4c <- dat %>%
  zelig(formula = excess_deaths_5yr_period_rate ~ 
           # political variables  
          sci_usjec + democrat_2016 + 
          # control for mobility and pop density
          workplaces + pop_density +
          # control for health care, conditions, and behaviors
          health_care_capacity + uninsured + health_quality +
          # demographic and socioeconomics
          pop_age_65_plus +  pop_female +  pop_black + #pop_hisp + 
          # basic socioeconomics and govt capacity  
          median_household_income + some_college + employee_muni, 
        model = "normal.gee", corstr = "exchangeable", id = "fips")



# Get it for USJEC subtypes
# Let's also calculate this as a fixed effects model
m5a <- dat %>%
  lm(formula = excess_deaths_5yr_period_rate ~ 
       # political variables  
       family_unity_usjec + community_health_usjec + 
       institutional_health_usjec + collective_efficacy_usjec + 
       democrat_2016 + 
       # control for mobility and pop density
       workplaces + pop_density +
       # control for health care, conditions, and behaviors
       health_care_capacity + uninsured + health_quality +
       # demographic and socioeconomics
       pop_age_65_plus +  pop_female +  pop_black + #pop_hisp + 
       # basic socioeconomics and govt capacity  
       median_household_income + some_college + employee_muni + factor(date))

# Let's also generate a random effects model
m5b <- dat %>%
  lmer(formula = excess_deaths_5yr_period_rate ~ 
         # political variables  
         family_unity_usjec + community_health_usjec + 
         institutional_health_usjec + collective_efficacy_usjec + 
         democrat_2016 + 
         # control for mobility and pop density
         workplaces + pop_density +
         # control for health care, conditions, and behaviors
         health_care_capacity + uninsured + health_quality +
         # demographic and socioeconomics
         pop_age_65_plus +  pop_female +  pop_black + #pop_hisp + 
         # basic socioeconomics and govt capacity  
         median_household_income + some_college + employee_muni +
         (1 | date)) 
# Now reestimate GEE with Zelig
m5c <- dat %>%
  zelig(formula = excess_deaths_5yr_period_rate ~ 
          # political variables  
          family_unity_usjec + community_health_usjec + 
          institutional_health_usjec + collective_efficacy_usjec + 
          democrat_2016 + 
          # control for mobility and pop density
          workplaces + pop_density +
          # control for health care, conditions, and behaviors
          health_care_capacity + uninsured + health_quality +
          # demographic and socioeconomics
          pop_age_65_plus +  pop_female +  pop_black + #pop_hisp + 
          # basic socioeconomics and govt capacity  
          median_household_income + some_college + employee_muni, 
        model = "normal.gee", corstr = "exchangeable", id = "fips")



texreg::htmlreg(
  list(m1a,m1b,m1c,
       m2a,m2b,m2c,
       m3a,m3b,m3c,
       m4a,m4b,m4c,
       m5a,m5b,m5c),
  bold = 0.10,
  stars = c(0.001, 0.01, 0.05, 0.10),
  caption.above = TRUE,
  caption = "<b>Panel OLS models of Excess Death Rates for US Counties by Time Period (n = 3416)</b><br><i>Based on an Unbalanced Panel Dataset of up to 941 counties observed between February 1 and September 26 (time periods = 6)</i>",
  custom.header = list("Using Kyne & Aldrich's<br>Social Capital Index<br>Overall Social Capital Index" = 1:3,
                       "Using Kyne & Aldrich's<br>Social Capital Index<br>Subindices by Type of Social Capital" = 4:6,
                       "Using Rupasingha et al.'s<br>Social Capital Index<br>Overall Social Capital Index" = 7:9,
                       "Using USJEC's<br>Social Capital Index<br>Overall Social Capital Index" = 10:12,
                       "Using USJEC's<br>Social Capital Index<br>Subindices by Type of Social Capital" = 13:15),
  custom.model.names = rep(c("Fixed<br>Effects", "Random<br>Effects", "GEE<br>(Exchangeable)"), 5),
  file = "viz/table_4.html",
  custom.coef.map = list(
    # Social Capital Effects
    "socialcap" = "Social Capital",
    "sci_psu" = "Social Capital",
    "sci_usjec" = "Social Capital",
    # types
    "bonding" = "Bonding Social Capital",
    "bridging" = "Bridging Social Capital",
    "linking" = "Linking Social Capital",
    # other types
    "family_unity_usjec" = "Family Unity Index",
    "community_health_usjec" = "Community Health Index",
    "collective_efficacy_usjec" = "Collective Efficacy Index",
    "institutional_health_usjec" = "Institutional Health Index",
    # Mobility
    "workplaces" = "Average % Change in Mobility (Workplaces)",
    # Governance
    "democrat_2016" = "% Voted Democrat in 2016 Presidential Election",
    "employee_muni" = "Municipal Employees per 1000 residents",
    # Health Care Capacity
    "health_care_capacity" = "Health Care Capacity Index",
    "uninsured" = "% Uninsured",
    # Health Conditions
    "health_quality" = "Quality of Health Index",
    # Socioeconomics
    "median_household_income" = "Median Household Income",
    "some_college" = "% Some college or more",
    # Demographics
    "pop_age_65_plus" = "% Age 65 or over",
    "pop_female" = "% Women",
    "pop_black" = "% Black",
    "pop_density" = "Population Density",
    "(Intercept)" = "Constant"),
  groups = list("<b>Social Capital</b>" = 1:8,
                "<b>Mobility</b>" = 9,
                "<b>Politics & Governance</b>" = 10:11,
                "<b>Health Care</b>" = 12:14,
                "<b>Socioeconomics</b>" = 15:16,
                "<b>Demographics</b>" = 17:20),
  custom.gof.rows = list(
    "R2 / Conditional R2" =   list(m1a,m1b,m1c,
       m2a,m2b,m2c,
       m3a,m3b,m3c,
       m4a,m4b,m4c,
       m5a,m5b,m5c) %>%
      map(~performance::r2(.)[1]) %>% unlist(),
    "Adj. R2 / Marginal R2" =   list(m1a,m1b,m1c,
       m2a,m2b,m2c,
       m3a,m3b,m3c,
       m4a,m4b,m4c,
       m5a,m5b,m5c) %>%
      map(~performance::r2(.)[2]) %>% unlist()),
  include.fstat = TRUE,
  include.aic = FALSE, include.bic = FALSE, include.loglik = FALSE,
  include.rsqu = FALSE, include.adjr = FALSE, include.groups = FALSE,
  custom.note = "*** signifies statistical significance at p < 0.001, ** at p < 0.01, * at p < 0.05, and . at p < 0.10. Effect sizes have been standardized from 1 (min) to 10 (max) and can be compared among continuous variables. All variance inflation factor (VIF) scores were below 10, the threshold for problematic colinearity, with a max of 7.5. Health quality, health care capacity, income, and family unity were broken into 8 quantiles to avoid colinearity problems.")

```

## 3.3 Balanced Panel Models

### Working Correlation

```{r}

# Identify the balanced panel
balanced <- read_rds("dataset.rds") %>% 
  select(date, fips) %>%
  group_by(fips) %>%
  count() %>%
  filter(n == 6) %>%
  select(fips) %>% unlist()


# Import data!
dat <- read_rds("dataset.rds") %>%
  # Adjust for colinearity
  mutate_at(vars(health_care_capacity,
               health_quality,
               some_college, family_unity_usjec),
          funs(ntile(., 8))) %>%
  # Format all numeric variables
  # Rescale each from 1 to 10
  mutate_at(vars(workplaces:pop_density),
            funs(scales::rescale(., to = c(1,10))))  %>%
  mutate(date = factor(date)) %>%
  arrange(fips, date) %>%
  # Filter to balanced panel
  filter(fips %in% balanced) %>%
  as.data.frame() 



# Let's identify the correct working correlation structure using the MESS package,
# we'll get an independent/unstructed one, an exchangeable one, and one with AR-1 autocorrelation,
# and we'll compare which produces the lowest quasi-likelihood estimate (QIC)

m1 <- dat %>%
  geeglm(formula = excess_deaths_5yr_period_rate ~ 
           # political variables  
          bonding + bridging + linking + 
          democrat_2016 + 
          # control for mobility and pop density
          workplaces + pop_density +
          # control for health care, conditions, and behaviors
          health_care_capacity + uninsured + health_quality +
          # demographic and socioeconomics
          pop_age_65_plus +  pop_female +  pop_black + #pop_hisp + 
          # basic socioeconomics and govt capacity  
          median_household_income + some_college + employee_muni, 
         family = "gaussian", id = fips, 
         corstr = "exchangeable", std.err="san.se") 

m1b <- dat %>%
  geeglm(formula = excess_deaths_5yr_period_rate ~ 
           # political variables  
          bonding + bridging + linking + 
          democrat_2016 + 
          # control for mobility and pop density
          workplaces + pop_density +
          # control for health care, conditions, and behaviors
          health_care_capacity + uninsured + health_quality +
          # demographic and socioeconomics
          pop_age_65_plus +  pop_female +  pop_black + #pop_hisp + 
          # basic socioeconomics and govt capacity  
          median_household_income + some_college + employee_muni, 
         family = "gaussian", id = fips, corstr = "ar1", std.err="san.se") 


m1c <- dat %>%
  geeglm(formula = excess_deaths_5yr_period_rate ~ 
           # political variables  
          bonding + bridging + linking + 
          democrat_2016 + 
          # control for mobility and pop density
          workplaces + pop_density +
          # control for health care, conditions, and behaviors
          health_care_capacity + uninsured + health_quality +
          # demographic and socioeconomics
          pop_age_65_plus +  pop_female +  pop_black + #pop_hisp + 
          # basic socioeconomics and govt capacity  
          median_household_income + some_college + employee_muni, 
         family = "gaussian", id = fips, corstr = "independece", std.err="san.se") 


m1d <- dat %>%
  geeglm(formula = excess_deaths_5yr_period_rate ~ 
           # political variables  
          bonding + bridging + linking + 
          democrat_2016 + 
          # control for mobility and pop density
          workplaces + pop_density +
          # control for health care, conditions, and behaviors
          health_care_capacity + uninsured + health_quality +
          # demographic and socioeconomics
          pop_age_65_plus +  pop_female +  pop_black + #pop_hisp + 
          # basic socioeconomics and govt capacity  
          median_household_income + some_college + employee_muni, 
         family = "gaussian", id = fips, 
         corstr = "unstructured", std.err="san.se") 


#http://finzi.psych.upenn.edu/R/library/MuMIn/html/QIC.html
list(m1,m1b,m1c,m1d) %>%
  map(~MESS::QIC(.) %>% round(0)) %>%
  bind_rows(.id = "model")

# AR-1 has the lowest. Let's go with that
remove(m1, m1b,m1c, m1d)
```

### Models

```{r}

# Identify the balanced panel
balanced <- read_rds("dataset.rds") %>% 
  select(date, fips) %>%
  group_by(fips) %>%
  count() %>%
  filter(n == 6) %>%
  select(fips) %>% unlist()


# Import data!
dat <- read_rds("dataset.rds") %>%
  # Adjust for colinearity
  mutate_at(vars(health_care_capacity,
               health_quality,
               some_college, family_unity_usjec),
          funs(ntile(., 8))) %>%
  # Format all numeric variables
  # Rescale each from 1 to 10
  mutate_at(vars(workplaces:pop_density),
            funs(scales::rescale(., to = c(1,10))))  %>%
  mutate(date = factor(date)) %>%
  arrange(fips, date) %>%
  # Filter to balanced panel
  filter(fips %in% balanced) %>%
  as.data.frame() 

       
# Let's also calculate this as a fixed effects model
m1 <- dat %>%
  lm(formula = excess_deaths_5yr_period_rate ~ 
           # political variables  
          bonding + bridging + linking + 
          democrat_2016 + 
          # control for mobility and pop density
          workplaces + pop_density +
          # control for health care, conditions, and behaviors
          health_care_capacity + uninsured + health_quality +
          # demographic and socioeconomics
          pop_age_65_plus +  pop_female +  pop_black + #pop_hisp + 
          # basic socioeconomics and govt capacity  
          median_household_income + some_college + employee_muni + factor(date))

# Let's also generate a random effects model
m2 <- dat %>%
  lmer(formula = excess_deaths_5yr_period_rate ~ 
           # political variables  
          bonding + bridging + linking + 
          democrat_2016 + 
          # control for mobility and pop density
          workplaces + pop_density +
          # control for health care, conditions, and behaviors
          health_care_capacity + uninsured + health_quality +
          # demographic and socioeconomics
          pop_age_65_plus +  pop_female +  pop_black + #pop_hisp + 
          # basic socioeconomics and govt capacity  
          median_household_income + some_college + employee_muni +
         (1 | date)) 

# Now reestimate GEE with Zelig
m3 <- dat %>%
  zelig(formula = excess_deaths_5yr_period_rate ~ 
           # political variables  
          bonding + bridging + linking + 
          democrat_2016 + 
          # control for mobility and pop density
          workplaces + pop_density +
          # control for health care, conditions, and behaviors
          health_care_capacity + uninsured + health_quality +
          # demographic and socioeconomics
          pop_age_65_plus +  pop_female +  pop_black + #pop_hisp + 
          # basic socioeconomics and govt capacity  
          median_household_income + some_college + employee_muni, 
        model = "normal.gee", corstr = "AR-1", id = "fips")
  

texreg::htmlreg(
  list(m1,m2,m3),
  bold = 0.10,
  stars = c(0.001, 0.01, 0.05, 0.10),
  caption.above = TRUE,
  caption = "<b>Balanced Panel OLS models of Excess Death Rates for US Counties by Time Period (n = 1716)</b><br><i>Based on an Balanced Panel Dataset of up to 286 counties<br>observed between February 1 and September 26 (time periods = 6)</i>",
  custom.header = list("Using Kyne & Aldrich's Social Capital Index<br>Overall Social Capital Index" = 1:3),
  custom.model.names = rep(c("Fixed<br>Effects", "Random<br>Effects", "GEE<br>(AR-1)"), 1),
  file = "viz/table_5.html",
  custom.coef.map = list(
    # Social Capital Effects
    "socialcap" = "Social Capital",
    "sci_psu" = "Social Capital",
    "sci_usjec" = "Social Capital",
    # types
    "bonding" = "Bonding Social Capital",
    "bridging" = "Bridging Social Capital",
    "linking" = "Linking Social Capital",
    # other types
    "family_unity_usjec" = "Family Unity Index",
    "community_health_usjec" = "Community Health Index",
    "collective_efficacy_usjec" = "Collective Efficacy Index",
    "institutional_health_usjec" = "Institutional Health Index",
    # Mobility
    "workplaces" = "Average % Change in Mobility (Workplaces)",
    # Governance
    "democrat_2016" = "% Voted Democrat in 2016 Presidential Election",
    "employee_muni" = "Municipal Employees per 1000 residents",
    # Health Care Capacity
    "health_care_capacity" = "Health Care Capacity Index",
    "uninsured" = "% Uninsured",
    # Health Conditions
    "health_quality" = "Quality of Health Index",
    # Socioeconomics
    "median_household_income" = "Median Household Income",
    "some_college" = "% Some college or more",
    # Demographics
    "pop_age_65_plus" = "% Age 65 or over",
    "pop_female" = "% Women",
    "pop_black" = "% Black",
    "pop_density" = "Population Density",
    "(Intercept)" = "Constant"),
  groups = list("<b>Social Capital</b>" = 1:3,
                "<b>Mobility</b>" = 4,
                "<b>Politics & Governance</b>" = 5:6,
                "<b>Health Care</b>" = 7:9,
                "<b>Socioeconomics</b>" = 10:11,
                "<b>Demographics</b>" = 12:16),
  custom.gof.rows = list(
    "R2 / Conditional R2" = list(m1,m2,m3) %>%
      map(~performance::r2(.)[1]) %>% unlist(),
    "Adj. R2 / Marginal R2" = list(m1,m2,m3) %>%
      map(~performance::r2(.)[2]) %>% unlist()),
  include.fstat = TRUE,
  include.aic = FALSE, include.bic = FALSE, include.loglik = FALSE,
  include.rsqu = FALSE, include.adjr = FALSE, include.groups = FALSE,
  custom.note = "*** signifies statistical significance at p < 0.001, ** at p < 0.01, * at p < 0.05, and . at p < 0.10. Effect sizes have been standardized from 1 (min) to 10 (max) and can be compared among continuous variables. All variance inflation factor (VIF) scores were below 10, the threshold for problematic colinearity. Health quality, health care capacity, and income were broken into 8 quantiles to avoid colinearity problems.")
```


### Alternatives

```{r}

# Identify the balanced panel
balanced <- read_rds("dataset.rds") %>% 
  select(date, fips) %>%
  group_by(fips) %>%
  count() %>%
  filter(n == 6) %>%
  select(fips) %>% unlist()


# Import data!
dat <- read_rds("dataset.rds") %>%
  # Adjust for colinearity
  mutate_at(vars(health_care_capacity,
               health_quality,
               some_college, family_unity_usjec),
          funs(ntile(., 8))) %>%
  # Format all numeric variables
  # Rescale each from 1 to 10
  mutate_at(vars(workplaces:pop_density),
            funs(scales::rescale(., to = c(1,10))))  %>%
  mutate(date = factor(date)) %>%
  arrange(fips, date) %>%
  # Filter to balanced panel
  filter(fips %in% balanced) %>%
  as.data.frame() 


# Get it for overall social capital for Kyne and Aldrich
# Let's also calculate this as a fixed effects model
m1a <- dat %>%
  lm(formula = excess_deaths_5yr_period_rate ~ 
           # political variables  
          socialcap + 
          democrat_2016 + 
          # control for mobility and pop density
          workplaces + pop_density +
          # control for health care, conditions, and behaviors
          health_care_capacity + uninsured + health_quality +
          # demographic and socioeconomics
          pop_age_65_plus +  pop_female +  pop_black + #pop_hisp + 
          # basic socioeconomics and govt capacity  
          median_household_income + some_college + employee_muni + factor(date))

# Let's also generate a random effects model
m1b <- dat %>%
  lmer(formula = excess_deaths_5yr_period_rate ~ 
           # political variables  
          socialcap + democrat_2016 + 
          # control for mobility and pop density
          workplaces + pop_density +
          # control for health care, conditions, and behaviors
          health_care_capacity + uninsured + health_quality +
          # demographic and socioeconomics
          pop_age_65_plus +  pop_female +  pop_black + #pop_hisp + 
          # basic socioeconomics and govt capacity  
          median_household_income + some_college + employee_muni +
         (1 | date)) 
# Now reestimate GEE with Zelig
m1c <- dat %>%
  zelig(formula = excess_deaths_5yr_period_rate ~ 
           # political variables  
          socialcap + democrat_2016 + 
          # control for mobility and pop density
          workplaces + pop_density +
          # control for health care, conditions, and behaviors
          health_care_capacity + uninsured + health_quality +
          # demographic and socioeconomics
          pop_age_65_plus +  pop_female +  pop_black + #pop_hisp + 
          # basic socioeconomics and govt capacity  
          median_household_income + some_college + employee_muni, 
        model = "normal.gee", corstr = "AR-1", id = "fips")

    
# Repeat for each subtype   
# Let's also calculate this as a fixed effects model
m2a <- dat %>%
  lm(formula = excess_deaths_5yr_period_rate ~ 
           # political variables  
          bonding + bridging + linking + 
          democrat_2016 + 
          # control for mobility and pop density
          workplaces + pop_density +
          # control for health care, conditions, and behaviors
          health_care_capacity + uninsured + health_quality +
          # demographic and socioeconomics
          pop_age_65_plus +  pop_female +  pop_black + #pop_hisp + 
          # basic socioeconomics and govt capacity  
          median_household_income + some_college + employee_muni + factor(date))

# Let's also generate a random effects model
m2b <- dat %>%
  lmer(formula = excess_deaths_5yr_period_rate ~ 
           # political variables  
          bonding + bridging + linking + 
          democrat_2016 + 
          # control for mobility and pop density
          workplaces + pop_density +
          # control for health care, conditions, and behaviors
          health_care_capacity + uninsured + health_quality +
          # demographic and socioeconomics
          pop_age_65_plus +  pop_female +  pop_black + #pop_hisp + 
          # basic socioeconomics and govt capacity  
          median_household_income + some_college + employee_muni +
         (1 | date)) 
# Now reestimate GEE with Zelig
m2c <- dat %>%
  zelig(formula = excess_deaths_5yr_period_rate ~ 
           # political variables  
          bonding + bridging + linking + 
          democrat_2016 + 
          # control for mobility and pop density
          workplaces + pop_density +
          # control for health care, conditions, and behaviors
          health_care_capacity + uninsured + health_quality +
          # demographic and socioeconomics
          pop_age_65_plus +  pop_female +  pop_black + #pop_hisp + 
          # basic socioeconomics and govt capacity  
          median_household_income + some_college + employee_muni, 
        model = "normal.gee", corstr = "AR-1", id = "fips")

 
# Get it for PSU
# Let's also calculate this as a fixed effects model
m3a <- dat %>%
  lm(formula = excess_deaths_5yr_period_rate ~ 
           # political variables  
          sci_psu + democrat_2016 + 
          # control for mobility and pop density
          workplaces + pop_density +
          # control for health care, conditions, and behaviors
          health_care_capacity + uninsured + health_quality +
          # demographic and socioeconomics
          pop_age_65_plus +  pop_female +  pop_black + #pop_hisp + 
          # basic socioeconomics and govt capacity  
          median_household_income + some_college + employee_muni + factor(date))

# Let's also generate a random effects model
m3b <- dat %>%
  lmer(formula = excess_deaths_5yr_period_rate ~ 
           # political variables  
          sci_psu + democrat_2016 + 
          # control for mobility and pop density
          workplaces + pop_density +
          # control for health care, conditions, and behaviors
          health_care_capacity + uninsured + health_quality +
          # demographic and socioeconomics
          pop_age_65_plus +  pop_female +  pop_black + #pop_hisp + 
          # basic socioeconomics and govt capacity  
          median_household_income + some_college + employee_muni +
         (1 | date)) 
# Now reestimate GEE with Zelig
m3c <- dat %>%
  zelig(formula = excess_deaths_5yr_period_rate ~ 
           # political variables  
          sci_psu + democrat_2016 + 
          # control for mobility and pop density
          workplaces + pop_density +
          # control for health care, conditions, and behaviors
          health_care_capacity + uninsured + health_quality +
          # demographic and socioeconomics
          pop_age_65_plus +  pop_female +  pop_black + #pop_hisp + 
          # basic socioeconomics and govt capacity  
          median_household_income + some_college + employee_muni, 
        model = "normal.gee", corstr = "AR-1", id = "fips")


# Get it for USJEC overall
# Let's also calculate this as a fixed effects model
m4a <- dat %>%
  lm(formula = excess_deaths_5yr_period_rate ~ 
           # political variables  
          sci_usjec + democrat_2016 + 
          # control for mobility and pop density
          workplaces + pop_density +
          # control for health care, conditions, and behaviors
          health_care_capacity + uninsured + health_quality +
          # demographic and socioeconomics
          pop_age_65_plus +  pop_female +  pop_black + #pop_hisp + 
          # basic socioeconomics and govt capacity  
          median_household_income + some_college + employee_muni + factor(date))

# Let's also generate a random effects model
m4b <- dat %>%
  lmer(formula = excess_deaths_5yr_period_rate ~ 
           # political variables  
          sci_usjec + democrat_2016 + 
          # control for mobility and pop density
          workplaces + pop_density +
          # control for health care, conditions, and behaviors
          health_care_capacity + uninsured + health_quality +
          # demographic and socioeconomics
          pop_age_65_plus +  pop_female +  pop_black + #pop_hisp + 
          # basic socioeconomics and govt capacity  
          median_household_income + some_college + employee_muni +
         (1 | date)) 
# Now reestimate GEE with Zelig
m4c <- dat %>%
  zelig(formula = excess_deaths_5yr_period_rate ~ 
           # political variables  
          sci_usjec + democrat_2016 + 
          # control for mobility and pop density
          workplaces + pop_density +
          # control for health care, conditions, and behaviors
          health_care_capacity + uninsured + health_quality +
          # demographic and socioeconomics
          pop_age_65_plus +  pop_female +  pop_black + #pop_hisp + 
          # basic socioeconomics and govt capacity  
          median_household_income + some_college + employee_muni, 
        model = "normal.gee", corstr = "AR-1", id = "fips")



# Get it for USJEC subtypes
# Let's also calculate this as a fixed effects model
m5a <- dat %>%
  lm(formula = excess_deaths_5yr_period_rate ~ 
       # political variables  
       family_unity_usjec + community_health_usjec + 
       institutional_health_usjec + collective_efficacy_usjec + 
       democrat_2016 + 
       # control for mobility and pop density
       workplaces + pop_density +
       # control for health care, conditions, and behaviors
       health_care_capacity + uninsured + health_quality +
       # demographic and socioeconomics
       pop_age_65_plus +  pop_female +  pop_black + #pop_hisp + 
       # basic socioeconomics and govt capacity  
       median_household_income + some_college + employee_muni + factor(date))

# Let's also generate a random effects model
m5b <- dat %>%
  lmer(formula = excess_deaths_5yr_period_rate ~ 
         # political variables  
         family_unity_usjec + community_health_usjec + 
         institutional_health_usjec + collective_efficacy_usjec + 
         democrat_2016 + 
         # control for mobility and pop density
         workplaces + pop_density +
         # control for health care, conditions, and behaviors
         health_care_capacity + uninsured + health_quality +
         # demographic and socioeconomics
         pop_age_65_plus +  pop_female +  pop_black + #pop_hisp + 
         # basic socioeconomics and govt capacity  
         median_household_income + some_college + employee_muni +
         (1 | date)) 
# Now reestimate GEE with Zelig
m5c <- dat %>%
  zelig(formula = excess_deaths_5yr_period_rate ~ 
          # political variables  
          family_unity_usjec + community_health_usjec + 
          institutional_health_usjec + collective_efficacy_usjec + 
          democrat_2016 + 
          # control for mobility and pop density
          workplaces + pop_density +
          # control for health care, conditions, and behaviors
          health_care_capacity + uninsured + health_quality +
          # demographic and socioeconomics
          pop_age_65_plus +  pop_female +  pop_black + #pop_hisp + 
          # basic socioeconomics and govt capacity  
          median_household_income + some_college + employee_muni, 
        model = "normal.gee", corstr = "AR-1", id = "fips")



texreg::htmlreg(
  list(m1a,m1b,m1c,
       m2a,m2b,m2c,
       m3a,m3b,m3c,
       m4a,m4b,m4c,
       m5a,m5b,m5c),
  bold = 0.10,
  stars = c(0.001, 0.01, 0.05, 0.10),
  caption.above = TRUE,
  caption = "<b>Panel OLS models of Excess Death Rates for US Counties by Time Period (n = 1716)</b><br><i>Based on a Balanced Panel Dataset of up to 286 counties<br>observed between February 1 and September 26 (time periods = 6)</i>",
  custom.header = list("Using Kyne & Aldrich's<br>Social Capital Index<br>Overall Social Capital Index" = 1:3,
                       "Using Kyne & Aldrich's<br>Social Capital Index<br>Subindices by Type of Social Capital" = 4:6,
                       "Using Rupasingha et al.'s<br>Social Capital Index<br>Overall Social Capital Index" = 7:9,
                       "Using USJEC's<br>Social Capital Index<br>Overall Social Capital Index" = 10:12,
                       "Using USJEC's<br>Social Capital Index<br>Subindices by Type of Social Capital" = 13:15),
  custom.model.names = rep(c("Fixed<br>Effects", "Random<br>Effects", "GEE<br>(AR-1)"), 5),
  file = "viz/table_6.html",
  custom.coef.map = list(
    # Social Capital Effects
    "socialcap" = "Social Capital",
    "sci_psu" = "Social Capital",
    "sci_usjec" = "Social Capital",
    # types
    "bonding" = "Bonding Social Capital",
    "bridging" = "Bridging Social Capital",
    "linking" = "Linking Social Capital",
    # other types
    "family_unity_usjec" = "Family Unity Index",
    "community_health_usjec" = "Community Health Index",
    "collective_efficacy_usjec" = "Collective Efficacy Index",
    "institutional_health_usjec" = "Institutional Health Index",
    # Mobility
    "workplaces" = "Average % Change in Mobility (Workplaces)",
    # Governance
    "democrat_2016" = "% Voted Democrat in 2016 Presidential Election",
    "employee_muni" = "Municipal Employees per 1000 residents",
    # Health Care Capacity
    "health_care_capacity" = "Health Care Capacity Index",
    "uninsured" = "% Uninsured",
    # Health Conditions
    "health_quality" = "Quality of Health Index",
    # Socioeconomics
    "median_household_income" = "Median Household Income",
    "some_college" = "% Some college or more",
    # Demographics
    "pop_age_65_plus" = "% Age 65 or over",
    "pop_female" = "% Women",
    "pop_black" = "% Black",
    "pop_density" = "Population Density",
    "(Intercept)" = "Constant"),
  groups = list("<b>Social Capital</b>" = 1:8,
                "<b>Mobility</b>" = 9,
                "<b>Politics & Governance</b>" = 10:11,
                "<b>Health Care</b>" = 12:14,
                "<b>Socioeconomics</b>" = 15:16,
                "<b>Demographics</b>" = 17:20),
  custom.gof.rows = list(
    "R2 / Conditional R2" =   list(m1a,m1b,m1c,
       m2a,m2b,m2c,
       m3a,m3b,m3c,
       m4a,m4b,m4c,
       m5a,m5b,m5c) %>%
      map(~performance::r2(.)[1]) %>% unlist(),
    "Adj. R2 / Marginal R2" =   list(m1a,m1b,m1c,
       m2a,m2b,m2c,
       m3a,m3b,m3c,
       m4a,m4b,m4c,
       m5a,m5b,m5c) %>%
      map(~performance::r2(.)[2]) %>% unlist()),
  include.fstat = TRUE,
  include.aic = FALSE, include.bic = FALSE, include.loglik = FALSE,
  include.rsqu = FALSE, include.adjr = FALSE, include.groups = FALSE,
  custom.note = "*** signifies statistical significance at p < 0.001, ** at p < 0.01, * at p < 0.05, and . at p < 0.10. Effect sizes have been standardized from 1 (min) to 10 (max) and can be compared among continuous variables. All variance inflation factor (VIF) scores were below 10, the threshold for problematic colinearity, with a max of 7.5. Health quality, health care capacity, income, and family unity were broken into 8 quantiles to avoid colinearity problems.")

remove(m1a,m1b,m1c,
       m2a,m2b,m2c,
       m3a,m3b,m3c,
       m4a,m4b,m4c,
       m5a,m5b,m5c)
```

# 4. Matched Models

## 4.1 Match by Social Capital

```{r}
library(tidyverse)
library(plm)


# Identify the balanced panel
balanced <- read_rds("dataset.rds") %>% 
  select(date, fips) %>%
  group_by(fips) %>%
  count() %>%
  filter(n == 6) %>%
  select(fips) %>% unlist()


# Import data!
dat <- read_rds("dataset.rds") %>%
  # Adjust for colinearity
#  mutate_at(vars(health_care_capacity,
#               health_quality,
#               some_college, family_unity_usjec),
#          funs(ntile(., 8))) %>%
  # Format all numeric variables
  # Rescale each from 1 to 10
  mutate_at(vars(workplaces:pop_density),
            funs(scales::rescale(., to = c(1,10))))  %>%
  mutate(date = factor(date)) %>%
  arrange(fips, date) %>%
  # Filter to balanced panel
  filter(fips %in% balanced) %>%
  as.data.frame() 

# Match using coarsened exact matching,
# to get counties as similar as possible except for their level of overall social capital
matched_any <- dat %>%
  filter(date == "2020-04-25") %>%
  matchit(formula = socialcap_cat ~ pop_density + 
            median_household_income + some_college +
            pop_age_65_plus + pop_black, # pop_hisp, 
          method = "cem") 

# as similar as possible except all three indices are above median or not
matched_all <- dat %>%
  filter(date == "2020-04-25") %>%
  matchit(formula = all_cat ~ pop_density + 
            median_household_income + some_college +
            pop_age_65_plus + pop_black, # + pop_hisp, 
          method = "cem") 

# as similar as possible except bonding index is above median, or not
matched_bonding <- dat %>%
  filter(date == "2020-04-25") %>%
  matchit(formula = bonding_cat ~ pop_density + 
            median_household_income + some_college +
            pop_age_65_plus + pop_black, # + pop_hisp, 
          method = "cem") 
# as similar as possible except bridging index is above median, or not
matched_bridging <- dat %>%
  filter(date == "2020-04-25") %>%
  matchit(formula = bridging_cat ~ pop_density + 
            median_household_income + some_college +
            pop_age_65_plus + pop_black, # + pop_hisp, 
          method = "cem") 
# as similar as possible except linking index is above median, or not
matched_linking <- dat %>%
  filter(date == "2020-04-25") %>%
  matchit(formula = linking_cat ~ pop_density + 
            median_household_income + some_college +
            pop_age_65_plus + pop_black, # + pop_hisp, 
          method = "cem") 
# as similar as possible except PSU social capital index is above median, or not
matched_psu <- dat %>%
  filter(date == "2020-04-25") %>%
  matchit(formula = sci_psu_cat ~ pop_density + 
            median_household_income + some_college +
            pop_age_65_plus + pop_black, # + pop_hisp, 
          method = "cem")
# as similar as possible except USJEC social capital index is above median, or not
matched_usjec <- dat %>%
  filter(date == "2020-04-25") %>%
  matchit(formula = sci_usjec_cat ~ pop_density + 
            median_household_income + some_college +
            pop_age_65_plus + pop_black, # + pop_hisp, 
          method = "cem")
# as similar as possible except USJEC social capital index is above median, or not
matched_family_unity <- dat %>%
  filter(date == "2020-04-25") %>%
  matchit(formula = family_unity_usjec_cat ~ pop_density + 
            median_household_income + some_college +
            pop_age_65_plus + pop_black, # + pop_hisp, 
          method = "cem")
# as similar as possible except USJEC social capital index is above median, or not
matched_community_health <- dat %>%
  filter(date == "2020-04-25") %>%
  matchit(formula = community_health_usjec_cat ~ pop_density + 
            median_household_income + some_college +
            pop_age_65_plus + pop_black, # + pop_hisp, 
          method = "cem")

# as similar as possible except USJEC social capital index is above median, or not
matched_institutional_health <- dat %>%
  filter(date == "2020-04-25") %>%
  matchit(formula = institutional_health_usjec_cat ~ pop_density + 
            median_household_income + some_college +
            pop_age_65_plus + pop_black, # + pop_hisp, 
          method = "cem")
# as similar as possible except USJEC social capital index is above median, or not
matched_collective_efficacy <- dat %>%
  filter(date == "2020-04-25") %>%
  matchit(formula = collective_efficacy_usjec_cat ~ pop_density + 
            median_household_income + some_college +
            pop_age_65_plus + pop_black, # + pop_hisp, 
          method = "cem")

dir.create("analysis")
# Let's see how many were matched, in each case.
list(matched_any, matched_all,
     matched_bonding, matched_bridging, matched_linking,
     matched_psu, matched_usjec, 
     matched_family_unity, matched_community_health,
     matched_institutional_health, matched_collective_efficacy) %>%
  saveRDS("analysis/matched_models.rds")

# Quite a number!
remove(matched_any, matched_all,
       matched_bonding, matched_bridging, 
       matched_linking, matched_psu, 
       matched_usjec, matched_family_unity,
       matched_community_health,
       matched_institutional_health,
       matched_collective_efficacy)



get_balance = function(matchmodel){
  bind_rows(
    summary(matchmodel)[3] %>%
      as.data.frame() %>%
      tibble::rownames_to_column(var = "term") %>%
      select(term, std_mean_diff = 4) %>%
      mutate(type = "Unmatched"),
    summary(matchmodel)[4] %>%
      as.data.frame() %>%
      tibble::rownames_to_column(var = "term") %>%
      select(term, std_mean_diff = 4) %>%
      mutate(type = "Matched")) %>%
    mutate(term = term %>% dplyr::recode_factor(
      "pop_density" = "Population\nDensity",
      "median_household_income" = "Median Household\nIncome",
      "some_college" = "(%) Some College",
      "pop_age_65_plus" = "(%) Age 65+",
      "pop_black" = "(%) Black")) %>%
    return()
}

viz <- read_rds("analysis/matched_models.rds") %>%
  map_dfr(~get_balance(.), .id = "model") %>%
  mutate(group = paste(type, model)) %>%
  mutate(model = model %>% recode_factor(
    "1" = "Social Capital\n(Overall)\n(Kyne & Aldrich)",
    "2" = "Social Capital\n(All Subtypes)\n(Kyne & Aldrich)",
    "3" = "Bonding\nSocial Capital\n(Kyne & Aldrich)",
    "4" = "Bridging\nSocial Capital\n(Kyne & Aldrich)",
    "5" = "Linking\nSocial Capital\n(Kyne & Aldrich)",
    "6" = "Social Capital\n(Rupasingha et al.)",
    "7" = "Social Capital\n(USJEC)",
    "8" = "Family\nUnity\n(USJEC)",
    "9" = "Community\nHealth\n(USJEC)",
    "10" = "Institutional\nHealth\n(USJEC)",
    "11" = "Collective\nEfficacy\n(USJEC)")) 

g1 <- viz %>%
  filter(str_detect(model, "Kyne & Aldrich")) %>%
  ggplot(mapping = aes(x = term, y = abs(std_mean_diff), color = type, group = group)) +
  geom_hline(yintercept = 0.1, color = "black", linetype = "dashed") + 
  geom_line() +
  geom_point(size = 2) +
  facet_grid(~model, scales = "free") +
  theme_classic(base_size = 14) +
  theme(legend.position = "bottom",
        panel.border = element_rect(fill = NA, color = "black"),
        panel.spacing = unit(0.5, "cm")) +
  scale_color_manual(values = c("#FFB000", "#648FFF")) +
  labs(x = NULL, y = NULL, color = "Sample") +
  coord_flip()

g2 <- viz %>%
  filter(str_detect(model, "Kyne & Aldrich", negate = TRUE)) %>%
  ggplot(mapping = aes(x = term, y = abs(std_mean_diff), color = type, group = group)) +
  geom_hline(yintercept = 0.1, color = "black", linetype = "dashed") + 
  geom_line() +
  geom_point(size = 2) +
  facet_grid(~model, scales = "free") +
  theme_classic(base_size = 14) +
  theme(legend.position = "bottom",
        panel.border = element_rect(fill = NA, color = "black"),
        panel.spacing = unit(0.5, "cm")) +
  scale_color_manual(values = c("#FFB000", "#648FFF")) +
  labs(x = NULL, y = "Absolute Standardized Mean Difference\nbetween Treatment and Control Group for specified covariate",
       color = "Sample") +
  coord_flip()

ggpubr::ggarrange(g1,g2, nrow = 2, common.legend = TRUE, legend = "bottom") +
  ggsave("viz/figure_A1.png", dpi = 500, width = 10, height = 7)

remove(dat, balanced, viz, g1,g2)
```



## 4.2 Panel Models

### Main Models

```{r}

# Identify the balanced panel
balanced <- read_rds("dataset.rds") %>% 
  select(date, fips) %>%
  group_by(fips) %>%
  count() %>%
  filter(n == 6) %>%
  select(fips) %>% unlist()


# Import data!
dat <- read_rds("dataset.rds") %>%
  # Adjust for colinearity
  mutate_at(vars(health_care_capacity,
               health_quality, workplaces, pop_female,pop_black,
               some_college, family_unity_usjec),
          funs(ntile(., 8))) %>%
  mutate(median_household_income = ntile(median_household_income, 6)) %>%
  # Format all numeric variables
  # Rescale each from 1 to 10
  mutate_at(vars(workplaces:pop_density),
            funs(scales::rescale(., to = c(1,10))))  %>%
  mutate(date = factor(date)) %>%
  arrange(fips, date) %>%
  # Filter to balanced panel
  filter(fips %in% balanced) %>%
  as.data.frame() 

# Write a function to extract matched data
get_matched_data = function(matchmodel){
  dat %>%
    # Join into it
    left_join(by = c("fips"),
              # The coarsened exact matching weights
              y = dat %>%
                filter(date == "2020-04-25") %>%
                # From the specified matched sample
                mutate(weights = matchmodel$weights) %>%
                select(fips, weights)) %>%
    filter(weights > 0) %>% 
    mutate(label = matchmodel$formula[2] %>% as.character() %>% str_remove("_cat[(][)]")) %>%
    return()
}

# Get the matched datasets
datm <- read_rds("analysis/matched_models.rds") %>%
  map_dfr(~get_matched_data(.)) 


m1a <- datm %>%
  filter(label == "socialcap_cat") %>%
      lm(formula = excess_deaths_5yr_period_rate ~ 
           socialcap +
           democrat_2016 + workplaces + pop_density +
           health_care_capacity + uninsured + health_quality +
           pop_age_65_plus +  pop_female +  pop_black + #pop_hisp + 
           median_household_income + some_college + employee_muni + 
           factor(date), weights = weights)
m1b <- datm %>%
  filter(label == "socialcap_cat") %>%
      lmer(formula = excess_deaths_5yr_period_rate ~ 
           socialcap +
           democrat_2016 + workplaces + pop_density +
           health_care_capacity + uninsured + health_quality +
           pop_age_65_plus +  pop_female +  pop_black + #pop_hisp + 
           median_household_income + some_college + employee_muni + 
           (1 | date), weights = weights)
m1c <- datm %>%
  filter(label == "socialcap_cat") %>%
      zelig(formula = excess_deaths_5yr_period_rate ~ 
           socialcap +
           democrat_2016 + workplaces + pop_density +
           health_care_capacity + uninsured + health_quality +
           pop_age_65_plus +  pop_female +  pop_black + #pop_hisp + 
           median_household_income + employee_muni, #some_college
           model = "normal.gee", corstr = "exchangeable", id = "fips",
           weights = "weights")
# Virtually the same results as AR-1



m2a <- datm %>%
  filter(label == "socialcap_cat") %>%
      lm(formula = excess_deaths_5yr_period_rate ~ 
           bonding + bridging + linking +
           democrat_2016 + workplaces + pop_density +
           health_care_capacity + uninsured + health_quality +
           pop_age_65_plus +  pop_female +  pop_black + #pop_hisp + 
           median_household_income + some_college + employee_muni + 
           factor(date), weights = weights)
m2b <- datm %>%
  filter(label == "socialcap_cat") %>%
      lmer(formula = excess_deaths_5yr_period_rate ~ 
           bonding + bridging + linking +
           democrat_2016 + workplaces + pop_density +
           health_care_capacity + uninsured + health_quality +
           pop_age_65_plus +  pop_female +  pop_black + #pop_hisp + 
           median_household_income + some_college + employee_muni + 
           (1 | date), weights = weights)
m2c <- datm %>%
  filter(label == "socialcap_cat") %>%
      zelig(formula = excess_deaths_5yr_period_rate ~ 
           bonding + bridging + linking +
           democrat_2016 + workplaces + pop_density +
           health_care_capacity + uninsured + health_quality +
           pop_age_65_plus +  pop_female +  pop_black + #pop_hisp + 
           median_household_income + employee_muni, # some_college
           model = "normal.gee", corstr = "exchangeable", id = "fips",
           weights = "weights")




m3a <- datm %>%
  filter(label == "all_cat") %>%
      lm(formula = excess_deaths_5yr_period_rate ~ 
           bonding + bridging + linking +
           democrat_2016 + workplaces + pop_density +
           health_care_capacity + uninsured + health_quality +
           pop_age_65_plus +  pop_female +  pop_black + #pop_hisp + 
           median_household_income + some_college + employee_muni + 
           factor(date), weights = weights)
m3b <- datm %>%
  filter(label == "all_cat") %>%
      lmer(formula = excess_deaths_5yr_period_rate ~ 
           bonding + bridging + linking +
           democrat_2016 + workplaces + pop_density +
           health_care_capacity + uninsured + health_quality +
           pop_age_65_plus +  pop_female +  pop_black + #pop_hisp + 
           median_household_income + some_college + employee_muni + 
           (1 | date), weights = weights)
m3c <- datm %>%
  filter(label == "all_cat") %>%
      zelig(formula = excess_deaths_5yr_period_rate ~ 
           bonding + bridging + linking +
           democrat_2016 + workplaces + pop_density +
           health_care_capacity + uninsured + health_quality +
           pop_age_65_plus +  pop_female +  pop_black + #pop_hisp + 
           median_household_income + employee_muni, # some_college
           model = "normal.gee", corstr = "exchangeable", id = "fips",
           weights = "weights")




m4a <- datm %>%
  filter(label == "bonding_cat") %>%
      lm(formula = excess_deaths_5yr_period_rate ~ 
           bonding + bridging + linking +
           democrat_2016 + workplaces + pop_density +
           health_care_capacity + uninsured + health_quality +
           pop_age_65_plus +  pop_female +  pop_black + #pop_hisp + 
           median_household_income + some_college + employee_muni + 
           factor(date), weights = weights)
m4b <- datm %>%
  filter(label == "bonding_cat") %>%
      lmer(formula = excess_deaths_5yr_period_rate ~ 
           bonding + bridging + linking +
           democrat_2016 + workplaces + pop_density +
           health_care_capacity + uninsured + health_quality +
           pop_age_65_plus +  pop_female +  pop_black + #pop_hisp + 
           median_household_income + some_college + employee_muni + 
           (1 | date), weights = weights)
m4c <- datm %>%
  filter(label == "bonding_cat") %>%
      zelig(formula = excess_deaths_5yr_period_rate ~ 
           bonding + bridging + linking +
           democrat_2016 + workplaces + pop_density +
           health_care_capacity + uninsured + health_quality +
           pop_age_65_plus +  pop_female +  pop_black + #pop_hisp + 
           median_household_income + employee_muni, #some_college
           model = "normal.gee", corstr = "exchangeable", id = "fips",
           weights = "weights")

m5a <- datm %>%
  filter(label == "bridging_cat") %>%
      lm(formula = excess_deaths_5yr_period_rate ~ 
           bonding + bridging + linking +
           democrat_2016 + workplaces + pop_density +
           health_care_capacity + uninsured + health_quality +
           pop_age_65_plus +  pop_female +  pop_black + #pop_hisp + 
           median_household_income + some_college + employee_muni + 
           factor(date), weights = weights)
m5b <- datm %>%
  filter(label == "bridging_cat") %>%
      lmer(formula = excess_deaths_5yr_period_rate ~ 
           bonding + bridging + linking +
           democrat_2016 + workplaces + pop_density +
           health_care_capacity + uninsured + health_quality +
           pop_age_65_plus +  pop_female +  pop_black + #pop_hisp + 
           median_household_income + some_college + employee_muni + 
           (1 | date), weights = weights)
m5c <- datm %>%
  filter(label == "bridging_cat") %>%
      zelig(formula = excess_deaths_5yr_period_rate ~ 
           bonding + bridging + linking +
           democrat_2016 + workplaces + pop_density +
           health_care_capacity + uninsured + health_quality +
           pop_age_65_plus +  pop_female +  pop_black + #pop_hisp + 
           median_household_income + employee_muni, #+ some_college,
           model = "normal.gee", corstr = "exchangeable", id = "fips",
           weights = "weights")



m6a <- datm %>%
  filter(label == "linking_cat") %>%
      lm(formula = excess_deaths_5yr_period_rate ~ 
           bonding + bridging + linking +
           democrat_2016 + workplaces + pop_density +
           health_care_capacity + uninsured + health_quality +
           pop_age_65_plus +  pop_female +  pop_black + #pop_hisp + 
           median_household_income + some_college + employee_muni + 
           factor(date), weights = weights)
m6b <- datm %>%
  filter(label == "linking_cat") %>%
      lmer(formula = excess_deaths_5yr_period_rate ~ 
           bonding + bridging + linking +
           democrat_2016 + workplaces + pop_density +
           health_care_capacity + uninsured + health_quality +
           pop_age_65_plus +  pop_female +  pop_black + #pop_hisp + 
           median_household_income + some_college + employee_muni + 
           (1 | date), weights = weights)
m6c <- datm %>%
  filter(label == "linking_cat") %>%
      zelig(formula = excess_deaths_5yr_period_rate ~ 
           bonding + bridging + linking +
           democrat_2016 + workplaces + pop_density +
           health_care_capacity + uninsured + health_quality +
           pop_age_65_plus +  pop_female +  pop_black + #pop_hisp + 
           median_household_income + employee_muni, #some_college,
           model = "normal.gee", corstr = "exchangeable", id = "fips",
           weights = "weights")
# SOME COLLEGE HAD TO BE REMOVED FOR FINAL MODEL

list(m1a,m1b,from_zelig_model(m1c),
     m2a,m2b,from_zelig_model(m2c),
     m3a,m3b,from_zelig_model(m3c),
     m4a,m4b,from_zelig_model(m4c),
     m5a,m5b,from_zelig_model(m5c),
     m6a,m6b,from_zelig_model(m6c)) %>%
  map(~car::vif(.))

modellist <- list(m1a,m1b,m1c,
     m2a,m2b,m2c,
     m3a,m3b,m3c)

texreg::htmlreg(
  modellist,
  bold = 0.10,
  stars = c(0.001, 0.01, 0.05, 0.10),
  caption.above = TRUE,
  caption = "<b>Matched Balanced Panel OLS models of Excess Death Rates for US Counties by Time Period</b><br><i>Based on Balanced Panel Dataset of up to 286 counties, matched to test the effect of each type of social capital using Coarsened Exact Matching<br>observed between February 1 and September 26 (time periods = 6)</i><br><b>Using Kyne & Aldrich's Social Capital Indices</b>",
  custom.header = list("Overall Social Capital<br>(Matched Overall)" = 1:3,
                       "Social Capital Subtypes<br>(Matched Overall)" = 4:6,
                       "Social Capital Subtypes<br>(Matched for All)" = 7:9),
  custom.model.names = rep(c("Fixed<br>Effects", "Random<br>Effects", "GEE<br>(Exchangeable)"), 3),
  file = "viz/table_7.html",
  custom.coef.map = list(
    # Social Capital Effects
    "socialcap" = "Social Capital",
    "sci_psu" = "Social Capital",
    "sci_usjec" = "Social Capital",
    # types
    "bonding" = "Bonding Social Capital",
    "bridging" = "Bridging Social Capital",
    "linking" = "Linking Social Capital",
    # other types
    "family_unity_usjec" = "Family Unity Index",
    "community_health_usjec" = "Community Health Index",
    "collective_efficacy_usjec" = "Collective Efficacy Index",
    "institutional_health_usjec" = "Institutional Health Index",
    # Mobility
    "workplaces" = "Average % Change in Mobility (Workplaces)",
    # Governance
    "democrat_2016" = "% Voted Democrat in 2016 Presidential Election",
    "employee_muni" = "Municipal Employees per 1000 residents",
    # Health Care Capacity
    "health_care_capacity" = "Health Care Capacity Index",
    "uninsured" = "% Uninsured",
    # Health Conditions
    "health_quality" = "Quality of Health Index",
    # Socioeconomics
    "median_household_income" = "Median Household Income",
    "some_college" = "% Some college or more",
    # Demographics
    "pop_age_65_plus" = "% Age 65 or over",
    "pop_female" = "% Women",
    "pop_black" = "% Black",
    "pop_density" = "Population Density",
    "(Intercept)" = "Constant"),
  groups = list("<b>Social Capital</b>" = 1:4,
                "<b>Mobility</b>" = 5,
                "<b>Politics & Governance</b>" = 6:7,
                "<b>Health Care</b>" = 8:10,
                "<b>Socioeconomics</b>" = 11:12,
                "<b>Demographics</b>" = 13:15),
  custom.gof.rows = list(
    "R2 / Conditional R2" = modellist %>%
      map(~performance::r2(.)[1]) %>% unlist(),
    "Adj. R2 / Marginal R2" = modellist %>%
      map(~performance::r2(.)[2]) %>% unlist()),
  include.fstat = TRUE,
  include.aic = FALSE, include.bic = FALSE, include.loglik = FALSE,
  include.rsqu = FALSE, include.adjr = FALSE, include.groups = FALSE,
  custom.note = "*** signifies statistical significance at p < 0.001, ** at p < 0.01, * at p < 0.05, and . at p < 0.10. Effect sizes have been standardized from 1 (min) to 10 (max) and can be compared among continuous variables. All variance inflation factor (VIF) scores were below 10, the threshold for problematic colinearity. Health quality, health care capacity, % women, % black, workplace mobility, college education rates, and family unity were broken into 8 quantiles to avoid colinearity problems. Income was broken into 6 quantiles. In GEE models, college education rates were removed due to colinearity.")

remove(modellist,
       m1a,m1b,m1c,
     m2a,m2b,m2c,
     m3a,m3b,m3c)


# Now repeat for the individual subtypes

modellist <- list(m4a,m4b,m4c,
     m5a,m5b,m5c,
     m6a,m6b,m6c)

texreg::htmlreg(
  modellist,
  bold = 0.10,
  stars = c(0.001, 0.01, 0.05, 0.10),
  caption.above = TRUE,
  caption = "<b>Matched Balanced Panel OLS models of Excess Death Rates for US Counties by Time Period</b><br><i>Based on Balanced Panel Dataset of up to 286 counties, matched to test the effect of each type of social capital using Coarsened Exact Matching<br>observed between February 1 and September 26 (time periods = 6)</i><br><b>Using Kyne & Aldrich's Social Capital Indices</b>",
  custom.header = list("Bonding Social Capital<br>(Matched)" = 1:3,
                       "Bridging Capital Subtypes<br>(Matched)" = 4:6,
                       "Linking Capital Subtypes<br>(Matched)" = 7:9),
  custom.model.names = rep(c("Fixed<br>Effects", "Random<br>Effects", "GEE<br>(Exchangeable)"), 3),
  file = "viz/table_8.html",
  custom.coef.map = list(
    # Social Capital Effects
    "socialcap" = "Social Capital",
    "sci_psu" = "Social Capital",
    "sci_usjec" = "Social Capital",
    # types
    "bonding" = "Bonding Social Capital",
    "bridging" = "Bridging Social Capital",
    "linking" = "Linking Social Capital",
    # other types
    "family_unity_usjec" = "Family Unity Index",
    "community_health_usjec" = "Community Health Index",
    "collective_efficacy_usjec" = "Collective Efficacy Index",
    "institutional_health_usjec" = "Institutional Health Index",
    # Mobility
    "workplaces" = "Average % Change in Mobility (Workplaces)",
    # Governance
    "democrat_2016" = "% Voted Democrat in 2016 Presidential Election",
    "employee_muni" = "Municipal Employees per 1000 residents",
    # Health Care Capacity
    "health_care_capacity" = "Health Care Capacity Index",
    "uninsured" = "% Uninsured",
    # Health Conditions
    "health_quality" = "Quality of Health Index",
    # Socioeconomics
    "median_household_income" = "Median Household Income",
    "some_college" = "% Some college or more",
    # Demographics
    "pop_age_65_plus" = "% Age 65 or over",
    "pop_female" = "% Women",
    "pop_black" = "% Black",
    "pop_density" = "Population Density",
    "(Intercept)" = "Constant"),
  groups = list("<b>Social Capital</b>" = 1:3,
                "<b>Mobility</b>" = 4,
                "<b>Politics & Governance</b>" = 5:6,
                "<b>Health Care</b>" = 7:9,
                "<b>Socioeconomics</b>" = 10:11,
                "<b>Demographics</b>" = 12:14),
  custom.gof.rows = list(
    "R2 / Conditional R2" = modellist %>%
      map(~performance::r2(.)[1]) %>% unlist(),
    "Adj. R2 / Marginal R2" = modellist %>%
      map(~performance::r2(.)[2]) %>% unlist()),
  include.fstat = TRUE,
  include.aic = FALSE, include.bic = FALSE, include.loglik = FALSE,
  include.rsqu = FALSE, include.adjr = FALSE, include.groups = FALSE,
  custom.note = "*** signifies statistical significance at p < 0.001, ** at p < 0.01, * at p < 0.05, and . at p < 0.10. Effect sizes have been standardized from 1 (min) to 10 (max) and can be compared among continuous variables. All variance inflation factor (VIF) scores were below 10, the threshold for problematic colinearity. Health quality, health care capacity, % women, % black, workplace mobility, college education rates, and family unity were broken into 8 quantiles to avoid colinearity problems. Income was broken into 6 quantiles. In GEE models, college education rates were removed due to colinearity.")


remove(modellist, m4a,m4b,m4c,
     m5a,m5b,m5c,
     m6a,m6b,m6c)
```

### Alternatives

```{r}

# Identify the balanced panel
balanced <- read_rds("dataset.rds") %>% 
  select(date, fips) %>%
  group_by(fips) %>%
  count() %>%
  filter(n == 6) %>%
  select(fips) %>% unlist()


# Import data!
dat <- read_rds("dataset.rds") %>%
  # Adjust for colinearity
  mutate_at(vars(health_care_capacity,
               health_quality, workplaces, pop_female,pop_black,
               some_college, family_unity_usjec),
          funs(ntile(., 8))) %>%
  mutate(median_household_income = ntile(median_household_income, 6)) %>%
  # Format all numeric variables
  # Rescale each from 1 to 10
  mutate_at(vars(workplaces:pop_density),
            funs(scales::rescale(., to = c(1,10))))  %>%
  mutate(date = factor(date)) %>%
  arrange(fips, date) %>%
  # Filter to balanced panel
  filter(fips %in% balanced) %>%
  as.data.frame() 

# Write a function to extract matched data
get_matched_data = function(matchmodel){
  dat %>%
    # Join into it
    left_join(by = c("fips"),
              # The coarsened exact matching weights
              y = dat %>%
                filter(date == "2020-04-25") %>%
                # From the specified matched sample
                mutate(weights = matchmodel$weights) %>%
                select(fips, weights)) %>%
    filter(weights > 0) %>% 
    mutate(label = matchmodel$formula[2] %>% as.character() %>% str_remove("_cat[(][)]")) %>%
    return()
}

# Get the matched datasets
datm <- read_rds("analysis/matched_models.rds") %>%
  map_dfr(~get_matched_data(.)) 


m1a <- datm %>%
  filter(label == "sci_psu_cat") %>%
      lm(formula = excess_deaths_5yr_period_rate ~ 
           sci_psu +
           democrat_2016 + workplaces + pop_density +
           health_care_capacity + uninsured + health_quality +
           pop_age_65_plus +  pop_female +  pop_black + #pop_hisp + 
           median_household_income + some_college + employee_muni + 
           factor(date), weights = weights)
m1b <- datm %>%
  filter(label == "sci_psu_cat") %>%
      lmer(formula = excess_deaths_5yr_period_rate ~ 
           sci_psu +
           democrat_2016 + workplaces + pop_density +
           health_care_capacity + uninsured + health_quality +
           pop_age_65_plus +  pop_female +  pop_black + #pop_hisp + 
           median_household_income + some_college + employee_muni + 
           (1 | date), weights = weights)
m1c <- datm %>%
  filter(label == "sci_psu_cat") %>%
      zelig(formula = excess_deaths_5yr_period_rate ~ 
           sci_psu +
           democrat_2016 + workplaces + pop_density +
           health_care_capacity + uninsured + health_quality +
           pop_age_65_plus +  pop_female +  pop_black + #pop_hisp + 
           median_household_income + employee_muni, #some_college
           model = "normal.gee", corstr = "exchangeable", id = "fips",
           weights = "weights")
# Virtually the same results as AR-1



m2a <- datm %>%
  filter(label == "sci_usjec_cat") %>%
      lm(formula = excess_deaths_5yr_period_rate ~ 
           sci_usjec +
           democrat_2016 + workplaces + pop_density +
           health_care_capacity + uninsured + health_quality +
           pop_age_65_plus +  pop_female +  pop_black + #pop_hisp + 
           median_household_income + some_college + employee_muni + 
           factor(date), weights = weights)
m2b <- datm %>%
  filter(label == "sci_usjec_cat") %>%
      lmer(formula = excess_deaths_5yr_period_rate ~ 
           sci_usjec +
           democrat_2016 + workplaces + pop_density +
           health_care_capacity + uninsured + health_quality +
           pop_age_65_plus +  pop_female +  pop_black + #pop_hisp + 
           median_household_income + some_college + employee_muni + 
           (1 | date), weights = weights)
m2c <- datm %>%
  filter(label == "sci_usjec_cat") %>%
      zelig(formula = excess_deaths_5yr_period_rate ~ 
           sci_usjec +
           democrat_2016 + workplaces + pop_density +
           health_care_capacity + uninsured + health_quality +
           pop_age_65_plus +  pop_female +  pop_black + #pop_hisp + 
           median_household_income + employee_muni, # some_college
           model = "normal.gee", corstr = "exchangeable", id = "fips",
           weights = "weights")


m3a <- datm %>%
  filter(label == "family_unity_usjec_cat") %>%
      lm(formula = excess_deaths_5yr_period_rate ~ 
           family_unity_usjec + community_health_usjec  +
           institutional_health_usjec + collective_efficacy_usjec +
           democrat_2016 + workplaces + pop_density +
           health_care_capacity + uninsured + health_quality +
           pop_age_65_plus +  pop_female +  pop_black + #pop_hisp + 
           median_household_income + some_college + employee_muni + 
           factor(date), weights = weights)
m3b <- datm %>%
  filter(label == "family_unity_usjec_cat") %>%
      lmer(formula = excess_deaths_5yr_period_rate ~ 
           family_unity_usjec + community_health_usjec  +
           institutional_health_usjec + collective_efficacy_usjec +
             democrat_2016 + workplaces + pop_density +
           health_care_capacity + uninsured + health_quality +
           pop_age_65_plus +  pop_female +  pop_black + #pop_hisp + 
           median_household_income + some_college + employee_muni + 
           (1 | date), weights = weights)
m3c <- datm %>%
  filter(label == "family_unity_usjec_cat") %>%
      zelig(formula = excess_deaths_5yr_period_rate ~ 
           family_unity_usjec + community_health_usjec  +
           institutional_health_usjec + collective_efficacy_usjec +
             democrat_2016 + workplaces + pop_density +
           health_care_capacity + uninsured + health_quality +
           pop_age_65_plus +  pop_female +  pop_black + #pop_hisp + 
           median_household_income + employee_muni, # some_college
           model = "normal.gee", corstr = "exchangeable", id = "fips",
           weights = "weights")





m4a <- datm %>%
  filter(label == "community_health_usjec_cat") %>%
      lm(formula = excess_deaths_5yr_period_rate ~ 
           family_unity_usjec + community_health_usjec  +
           institutional_health_usjec + collective_efficacy_usjec +
           democrat_2016 + workplaces + pop_density +
           health_care_capacity + uninsured + health_quality +
           pop_age_65_plus +  pop_female +  pop_black + #pop_hisp + 
           median_household_income + some_college + employee_muni + 
           factor(date), weights = weights)
m4b <- datm %>%
  filter(label == "community_health_usjec_cat") %>%
      lmer(formula = excess_deaths_5yr_period_rate ~ 
           family_unity_usjec + community_health_usjec  +
           institutional_health_usjec + collective_efficacy_usjec +
             democrat_2016 + workplaces + pop_density +
           health_care_capacity + uninsured + health_quality +
           pop_age_65_plus +  pop_female +  pop_black + #pop_hisp + 
           median_household_income + some_college + employee_muni + 
           (1 | date), weights = weights)
m4c <- datm %>%
  filter(label == "community_health_usjec_cat") %>%
      zelig(formula = excess_deaths_5yr_period_rate ~ 
           family_unity_usjec + community_health_usjec  +
           institutional_health_usjec + collective_efficacy_usjec +
             democrat_2016 + workplaces + pop_density +
           health_care_capacity + uninsured + health_quality +
           pop_age_65_plus +  pop_female +  pop_black + #pop_hisp + 
           median_household_income + employee_muni, # some_college
           model = "normal.gee", corstr = "exchangeable", id = "fips",
           weights = "weights")


m5a <- datm %>%
  filter(label == "institutional_health_usjec_cat") %>%
      lm(formula = excess_deaths_5yr_period_rate ~ 
           family_unity_usjec + community_health_usjec  +
           institutional_health_usjec + collective_efficacy_usjec +
           democrat_2016 + workplaces + pop_density +
           health_care_capacity + uninsured + health_quality +
           pop_age_65_plus +  pop_female +  pop_black + #pop_hisp + 
           median_household_income + some_college + employee_muni + 
           factor(date), weights = weights)
m5b <- datm %>%
  filter(label == "institutional_health_usjec_cat") %>%
      lmer(formula = excess_deaths_5yr_period_rate ~ 
           family_unity_usjec + community_health_usjec  +
           institutional_health_usjec + collective_efficacy_usjec +
             democrat_2016 + workplaces + pop_density +
           health_care_capacity + uninsured + health_quality +
           pop_age_65_plus +  pop_female +  pop_black + #pop_hisp + 
           median_household_income + some_college + employee_muni + 
           (1 | date), weights = weights)
m5c <- datm %>%
  filter(label == "institutional_health_usjec_cat") %>%
      zelig(formula = excess_deaths_5yr_period_rate ~ 
           family_unity_usjec + community_health_usjec  +
           institutional_health_usjec + collective_efficacy_usjec +
             democrat_2016 + workplaces + pop_density +
           health_care_capacity + uninsured + health_quality +
           pop_age_65_plus +  pop_female +  pop_black + #pop_hisp + 
           median_household_income + employee_muni, # some_college
           model = "normal.gee", corstr = "exchangeable", id = "fips",
           weights = "weights")





m6a <- datm %>%
  filter(label == "collective_efficacy_usjec_cat") %>%
      lm(formula = excess_deaths_5yr_period_rate ~ 
           family_unity_usjec + community_health_usjec  +
           institutional_health_usjec + collective_efficacy_usjec +
           democrat_2016 + workplaces + pop_density +
           health_care_capacity + uninsured + health_quality +
           pop_age_65_plus +  pop_female +  pop_black + #pop_hisp + 
           median_household_income + some_college + employee_muni + 
           factor(date), weights = weights)
m6b <- datm %>%
  filter(label == "collective_efficacy_usjec_cat") %>%
      lmer(formula = excess_deaths_5yr_period_rate ~ 
           family_unity_usjec + community_health_usjec  +
           institutional_health_usjec + collective_efficacy_usjec +
             democrat_2016 + workplaces + pop_density +
           health_care_capacity + uninsured + health_quality +
           pop_age_65_plus +  pop_female +  pop_black + #pop_hisp + 
           median_household_income + some_college + employee_muni + 
           (1 | date), weights = weights)
m6c <- datm %>%
  filter(label == "collective_efficacy_usjec_cat") %>%
      zelig(formula = excess_deaths_5yr_period_rate ~ 
           family_unity_usjec + community_health_usjec  +
           institutional_health_usjec + collective_efficacy_usjec +
             democrat_2016 + workplaces + pop_density +
           health_care_capacity + uninsured + health_quality +
           pop_age_65_plus +  pop_female +  pop_black + #pop_hisp + 
           median_household_income + employee_muni, # some_college
           model = "normal.gee", corstr = "exchangeable", id = "fips",
           weights = "weights")
# SOME COLLEGE HAD TO BE REMOVED FOR FINAL MODEL

list(m1a,m1b,from_zelig_model(m1c),
     m2a,m2b,from_zelig_model(m2c),
     m3a,m3b,from_zelig_model(m3c),
     m4a,m4b,from_zelig_model(m4c),
     m5a,m5b,from_zelig_model(m5c),
     m6a,m6b,from_zelig_model(m6c)) %>%
  map(~car::vif(.))

modellist <- list(m1a,m1b,m1c,
     m2a,m2b,m2c)

texreg::htmlreg(
  modellist,
  bold = 0.10,
  stars = c(0.001, 0.01, 0.05, 0.10),
  caption.above = TRUE,
  caption = "<b>Matched Balanced Panel OLS models of Excess Death Rates for US Counties by Time Period</b><br><i>Based on Balanced Panel Dataset of up to 286 counties, matched to test the effect of each type of social capital using Coarsened Exact Matching<br>observed between February 1 and September 26 (time periods = 6)</i><br><b>Using Alternative Social Capital Indices</b>",
  custom.header = list("Social Capital<br>(Rupasingha et al.)<br>(Matched)" = 1:3,
                       "Social Capital<br>(USJEC)<br>(Matched)" = 4:6),
  custom.model.names = rep(c("Fixed<br>Effects", "Random<br>Effects", "GEE<br>(Exchangeable)"), 2),
  file = "viz/table_9.html",
  custom.coef.map = list(
    # Social Capital Effects
    "socialcap" = "Social Capital",
    "sci_psu" = "Social Capital",
    "sci_usjec" = "Social Capital",
    # types
    "bonding" = "Bonding Social Capital",
    "bridging" = "Bridging Social Capital",
    "linking" = "Linking Social Capital",
    # other types
    "family_unity_usjec" = "Family Unity Index",
    "community_health_usjec" = "Community Health Index",
    "collective_efficacy_usjec" = "Collective Efficacy Index",
    "institutional_health_usjec" = "Institutional Health Index",
    # Mobility
    "workplaces" = "Average % Change in Mobility (Workplaces)",
    # Governance
    "democrat_2016" = "% Voted Democrat in 2016 Presidential Election",
    "employee_muni" = "Municipal Employees per 1000 residents",
    # Health Care Capacity
    "health_care_capacity" = "Health Care Capacity Index",
    "uninsured" = "% Uninsured",
    # Health Conditions
    "health_quality" = "Quality of Health Index",
    # Socioeconomics
    "median_household_income" = "Median Household Income",
    "some_college" = "% Some college or more",
    # Demographics
    "pop_age_65_plus" = "% Age 65 or over",
    "pop_female" = "% Women",
    "pop_black" = "% Black",
    "pop_density" = "Population Density",
    "(Intercept)" = "Constant"),
  groups = list("<b>Social Capital</b>" = 1,
                "<b>Mobility</b>" = 2,
                "<b>Politics & Governance</b>" = 3:4,
                "<b>Health Care</b>" = 5:7,
                "<b>Socioeconomics</b>" = 8:9,
                "<b>Demographics</b>" = 10:12),
  custom.gof.rows = list(
    "R2 / Conditional R2" = modellist %>%
      map(~performance::r2(.)[1]) %>% unlist(),
    "Adj. R2 / Marginal R2" = modellist %>%
      map(~performance::r2(.)[2]) %>% unlist()),
  include.fstat = TRUE,
  include.aic = FALSE, include.bic = FALSE, include.loglik = FALSE,
  include.rsqu = FALSE, include.adjr = FALSE, include.groups = FALSE,
  custom.note = "*** signifies statistical significance at p < 0.001, ** at p < 0.01, * at p < 0.05, and . at p < 0.10. Effect sizes have been standardized from 1 (min) to 10 (max) and can be compared among continuous variables. All variance inflation factor (VIF) scores were below 10, the threshold for problematic colinearity. Health quality, health care capacity, % women, % black, workplace mobility, college education rates, and family unity were broken into 8 quantiles to avoid colinearity problems. Income was broken into 6 quantiles. In GEE models, college education rates were removed due to colinearity.")

remove(modellist,
       m1a,m1b,m1c,
     m2a,m2b,m2c)


# Now repeat for the individual subtypes

modellist <- list(
  m3a,m3b,m3c,
  m4a,m4b,m4c,
     m5a,m5b,m5c,
     m6a,m6b,m6c)

texreg::htmlreg(
  modellist,
  bold = 0.10,
  stars = c(0.001, 0.01, 0.05, 0.10),
  caption.above = TRUE,
  caption = "<b>Matched Balanced Panel OLS models of Excess Death Rates for US Counties by Time Period</b><br><i>Based on Balanced Panel Dataset of up to 286 counties, matched to test the effect of each type of social capital using Coarsened Exact Matching<br>observed between February 1 and September 26 (time periods = 6)</i><br><b>Using Alternative Social Capital Indices</b>",
  custom.header = list("Family Unity<br>(Matched)" = 1:3,
                       "Community Health<br>(Matched)" = 4:6,
                       "Institutional Health<br>(Matched)" = 7:9,
                       "Collective Efficacy<br>(Matched)" = 10:12),
  custom.model.names = rep(c("Fixed<br>Effects", "Random<br>Effects", "GEE<br>(Exchangeable)"), 4),
  file = "viz/table_10.html",
  custom.coef.map = list(
    # Social Capital Effects
    "socialcap" = "Social Capital",
    "sci_psu" = "Social Capital",
    "sci_usjec" = "Social Capital",
    # types
    "bonding" = "Bonding Social Capital",
    "bridging" = "Bridging Social Capital",
    "linking" = "Linking Social Capital",
    # other types
    "family_unity_usjec" = "Family Unity Index",
    "community_health_usjec" = "Community Health Index",
    "collective_efficacy_usjec" = "Collective Efficacy Index",
    "institutional_health_usjec" = "Institutional Health Index",
    # Mobility
    "workplaces" = "Average % Change in Mobility (Workplaces)",
    # Governance
    "democrat_2016" = "% Voted Democrat in 2016 Presidential Election",
    "employee_muni" = "Municipal Employees per 1000 residents",
    # Health Care Capacity
    "health_care_capacity" = "Health Care Capacity Index",
    "uninsured" = "% Uninsured",
    # Health Conditions
    "health_quality" = "Quality of Health Index",
    # Socioeconomics
    "median_household_income" = "Median Household Income",
    "some_college" = "% Some college or more",
    # Demographics
    "pop_age_65_plus" = "% Age 65 or over",
    "pop_female" = "% Women",
    "pop_black" = "% Black",
    "pop_density" = "Population Density",
    "(Intercept)" = "Constant"),
  groups = list("<b>Social Capital</b>" = 1:4,
                "<b>Mobility</b>" = 5,
                "<b>Politics & Governance</b>" = 6:7,
                "<b>Health Care</b>" = 8:10,
                "<b>Socioeconomics</b>" = 11:12,
                "<b>Demographics</b>" = 13:15),
  custom.gof.rows = list(
    "R2 / Conditional R2" = modellist %>%
      map(~performance::r2(.)[1]) %>% unlist(),
    "Adj. R2 / Marginal R2" = modellist %>%
      map(~performance::r2(.)[2]) %>% unlist()),
  include.fstat = TRUE,
  include.aic = FALSE, include.bic = FALSE, include.loglik = FALSE,
  include.rsqu = FALSE, include.adjr = FALSE, include.groups = FALSE,
  custom.note = "*** signifies statistical significance at p < 0.001, ** at p < 0.01, * at p < 0.05, and . at p < 0.10. Effect sizes have been standardized from 1 (min) to 10 (max) and can be compared among continuous variables. All variance inflation factor (VIF) scores were below 10, the threshold for problematic colinearity. Health quality, health care capacity, % women, % black, workplace mobility, college education rates, and family unity were broken into 8 quantiles to avoid colinearity problems. Income was broken into 6 quantiles. In GEE models, college education rates were removed due to colinearity.")


remove(modellist, 
       m3a,m3b,m3c,
       m4a,m4b,m4c,
     m5a,m5b,m5c,
     m6a,m6b,m6c)
```



```{r}


# Overall model with Random effects
#m9 <- dat %>%
#  filter(fips %in% balanced) %>%
#  plm(formula = myformula2, model = "within", index = c("fips", "date"), effect = "time")
#phtest(m9,m10) # the random effects was better

m1 <- dat %>%
  filter(fips %in% balanced) %>%
    mutate(excess_deaths_period_per_capita = log(excess_deaths_period_per_capita + 0.00000000001)) %>%
  plm(formula = myformula1, model = "random", index = c("fips", "date"), effect = "time")



## Overall ##
# Matched by social capital (Fixed Effects)
#m2 <- dat %>%
#  # Join in the coarsened exact matching weights to our dataset,
#  left_join(by = "fips", y = matched_any) %>%
#  # And filter just to the matched, balanced dataset with CEM weights
#  filter(!is.na(weights)) %>%
#  plm(formula = myformula1,
#      weights = weights, 
#      model = "within", index = c("fips", "date"), effect = "time") 

# Matched by social capital (Random effects)
m2 <- dat %>%
  left_join(by = "fips", y = matched_any) %>%
  filter(!is.na(weights)) %>%
    mutate(excess_deaths_period_per_capita = log(excess_deaths_period_per_capita + 0.00000000001)) %>%
  plm(formula = myformula1,
      weights = weights, 
      model = "random", index = c("fips", "date"), effect = "time") 
# Random effects fits better
#phtest(m2,m3)


## Overall ##
# Matched by social capital (Fixed Effects)
#m7 <- dat %>%
#  # Join in the coarsened exact matching weights to our dataset,
#  left_join(by = "fips", y = matched_ll) %>%
#  # And filter just to the matched, balanced dataset with CEM weights
#  filter(!is.na(weights)) %>%
#  plm(formula = myformula1,
#      weights = weights, 
#      model = "within", index = c("fips", "date"), effect = "time") 

# Matched by all social capital (Random effects)
m3 <- dat %>%
  left_join(by = "fips", y = matched_all) %>%
  filter(!is.na(weights)) %>%
    mutate(excess_deaths_period_per_capita = log(excess_deaths_period_per_capita + 0.00000000001)) %>%
  plm(formula = myformula1,
      weights = weights, 
      model = "random", index = c("fips", "date"), effect = "time") 
# Random effects fits better
#phtest(m2,m3)


## Subtypes ##
m4 <- dat %>%
  filter(fips %in% balanced) %>%
    mutate(excess_deaths_period_per_capita = log(excess_deaths_period_per_capita + 0.00000000001)) %>%
  plm(formula = myformula2, model = "random", index = c("fips", "date"), effect = "time")



# Matched by social capital (Fixed Effects)
#m5 <- dat %>%
#  # Join in the coarsened exact matching weights to our dataset,
#  left_join(by = "fips", y = matched_any) %>%
#  # And filter just to the matched, balanced dataset with CEM weights
#  filter(!is.na(weights)) %>%
#  plm(formula = myformula2,
#      weights = weights, 
#      model = "within", index = c("fips", "date"), effect = "time") 

# Matched by social capital (Random effects)
m5 <- dat %>%
  left_join(by = "fips", y = matched_any) %>%
  filter(!is.na(weights)) %>%
    mutate(excess_deaths_period_per_capita = log(excess_deaths_period_per_capita + 0.00000000001)) %>%
  plm(formula = myformula2,
      weights = weights, 
      model = "random", index = c("fips", "date"), effect = "time") 
# Random effects fits better


# Matched by social capital (Fixed Effects)
#m5 <- dat %>%
#  # Join in the coarsened exact matching weights to our dataset,
#  left_join(by = "fips", y = matched_all) %>%
#  # And filter just to the matched, balanced dataset with CEM weights
#  filter(!is.na(weights)) %>%
#  plm(formula = myformula2,
#      weights = weights, 
#      model = "within", index = c("fips", "date"), effect = "time") 

# Matched by all social capital (Random effects)
m6 <- dat %>%
  left_join(by = "fips", y = matched_all) %>%
  filter(!is.na(weights)) %>%
    mutate(excess_deaths_period_per_capita = log(excess_deaths_period_per_capita + 0.00000000001)) %>%
  plm(formula = myformula2,
      weights = weights, 
      model = "random", index = c("fips", "date"), effect = "time") 
```


## 4.3 Individual Models

```{r}

# Identify the balanced panel
balanced <- read_rds("dataset.rds") %>% 
  select(date, fips) %>%
  group_by(fips) %>%
  count() %>%
  filter(n == 6) %>%
  select(fips) %>% unlist()


# Import data!
dat <- read_rds("dataset.rds") %>%
  # Adjust for colinearity
  mutate_at(vars(health_care_capacity, health_quality,
                 democrat_2016, workplaces, pop_black, pop_female),
          funs(ntile(., 8) %>% scales::rescale(to = c(1,10)))) %>%
  mutate(pop_density = (pop_density^2) %>% scales::rescale(to = c(1,10)),
         median_household_income = ntile(median_household_income, 6) %>% scales::rescale(to = c(1,10))) %>%
  # Format all numeric variables
  # Rescale each from 1 to 10
  mutate_at(vars(workplaces:pop_density),
            funs(scales::rescale(., to = c(1,10))))  %>%
  mutate(date = factor(date)) %>%
  arrange(fips, date) %>%
  # Filter to balanced panel
  filter(fips %in% balanced) %>%
  as.data.frame() 

# Write a function to extract matched data
get_matched_data = function(matchmodel){
  dat %>%
    # Join into it
    left_join(by = c("fips"),
              # The coarsened exact matching weights
              y = dat %>%
                filter(date == "2020-04-25") %>%
                # From the specified matched sample
                mutate(weights = matchmodel$weights) %>%
                select(fips, weights)) %>%
    filter(weights > 0) %>% 
    mutate(label = matchmodel$formula[2] %>% as.character() %>% str_remove("_cat[(][)]")) %>%
    return()
}

# Get the matched datasets
datm <- read_rds("analysis/matched_models.rds") %>%
  map_dfr(~get_matched_data(.)) 

remove(dat)

# Get models of single time slices 
m1 <- datm %>%
  filter(label == "socialcap_cat") %>%
  split(.$date) %>%
  map(~lm(formula = excess_deaths_5yr_period_rate ~ 
       socialcap +
         democrat_2016 + workplaces +  
       pop_density +  health_care_capacity + uninsured + health_quality +
       pop_age_65_plus +  pop_female +  pop_black + #pop_hisp + 
       median_household_income + some_college + employee_muni, data = ., weights = weights))

# Kyne & Aldrich, broken into three subindices
m2 <- datm %>%
  filter(label == "socialcap_cat") %>%
  split(.$date) %>%
  map(~lm(formula = excess_deaths_5yr_period_rate ~ 
       bonding + bridging + linking + 
       democrat_2016 + workplaces +  
       pop_density +  health_care_capacity + uninsured + health_quality +
       pop_age_65_plus +  pop_female +  pop_black + #pop_hisp + 
       median_household_income + some_college + employee_muni, data = ., weights = weights))

# Kyne & Aldrich, broken into three subindices
m3 <- datm %>%
  filter(label == "all_cat") %>%
  split(.$date) %>%
  map(~lm(formula = excess_deaths_5yr_period_rate ~ 
       bonding + bridging + linking + 
       democrat_2016 + workplaces +  
       pop_density +  health_care_capacity + uninsured + health_quality +
       pop_age_65_plus +  pop_female +  pop_black + #pop_hisp + 
       median_household_income + some_college + employee_muni, data = ., weights = weights))

# Kyne & Aldrich, broken into three subindices
m4 <- datm %>%
  filter(label == "bonding_cat") %>%
  split(.$date) %>%
  map(~lm(formula = excess_deaths_5yr_period_rate ~ 
       bonding + bridging + linking + 
       democrat_2016 + workplaces +  
       pop_density +  health_care_capacity + uninsured + health_quality +
       pop_age_65_plus +  pop_female +  pop_black + #pop_hisp + 
       median_household_income + some_college + employee_muni, data = ., weights = weights))

# Kyne & Aldrich, broken into three subindices
m5 <- datm %>%
  filter(label == "bridging_cat") %>%
  split(.$date) %>%
  map(~lm(formula = excess_deaths_5yr_period_rate ~ 
       bonding + bridging + linking + 
       democrat_2016 + workplaces +  
       pop_density +  health_care_capacity + uninsured + health_quality +
       pop_age_65_plus +  pop_female +  pop_black + #pop_hisp + 
       median_household_income + some_college + employee_muni, data = ., weights = weights))

# Kyne & Aldrich, broken into three subindices
m6 <- datm %>%
  filter(label == "linking_cat") %>%
  split(.$date) %>%
  map(~lm(formula = excess_deaths_5yr_period_rate ~ 
       bonding + bridging + linking + 
       democrat_2016 + workplaces +  
       pop_density +  health_care_capacity + uninsured + health_quality +
       pop_age_65_plus +  pop_female +  pop_black + #pop_hisp + 
       median_household_income + some_college + employee_muni, data = ., weights = weights))

# Penn
m7 <- datm %>%
  filter(label == "sci_psu_cat") %>%
  split(.$date) %>%
  map(~lm(formula = excess_deaths_5yr_period_rate ~ 
       sci_psu + 
       democrat_2016 + workplaces +  
       pop_density +  health_care_capacity + uninsured + health_quality +
       pop_age_65_plus +  pop_female +  pop_black + #pop_hisp + 
       median_household_income + some_college + employee_muni, data = ., weights = weights))

# Democrat is strongly colinear with everything remove it
m8 <- datm %>%
  filter(label == "sci_usjec_cat") %>%
  split(.$date) %>%
  map(~lm(formula = excess_deaths_5yr_period_rate ~ 
       sci_usjec + #democrat_2016 + 
         workplaces +  
       pop_density +  health_care_capacity + uninsured + health_quality +
       pop_age_65_plus +  pop_female +  pop_black + #pop_hisp + 
       median_household_income + some_college + employee_muni, data = ., weights = weights))

# USJEC
# USJEC subindices
m9 <- datm %>%
  filter(label == "family_unity_usjec_cat") %>%
  # In order to avoid colinearity by health quality ,race, and income,
  # must break this index into 10 pieces
  mutate(family_unity_usjec = ntile(family_unity_usjec, 10) %>% 
           scales::rescale(to = c(1,10))) %>%
  split(.$date) %>%
  map(~lm(formula = excess_deaths_5yr_period_rate ~ 
       family_unity_usjec + community_health_usjec + 
        institutional_health_usjec + collective_efficacy_usjec +
       #democrat_2016 + 
         workplaces +  
       pop_density +  health_care_capacity + uninsured + health_quality +
       pop_age_65_plus +  pop_female +  pop_black + #pop_hisp + 
       median_household_income + some_college + employee_muni, data = ., weights = weights))


m10 <- datm %>%
  filter(label == "community_health_usjec_cat") %>%
  # In order to avoid colinearity by health quality ,race, and income,
  # must break this index into 10 pieces
  mutate(family_unity_usjec = ntile(family_unity_usjec, 10) %>% 
           scales::rescale(to = c(1,10))) %>%
  split(.$date) %>%
  map(~lm(formula = excess_deaths_5yr_period_rate ~ 
       family_unity_usjec + community_health_usjec + 
        institutional_health_usjec + collective_efficacy_usjec +
       #democrat_2016 + 
         workplaces +  
       pop_density +  health_care_capacity + uninsured + health_quality +
       pop_age_65_plus +  pop_female +  pop_black + #pop_hisp + 
       median_household_income + some_college + employee_muni, data = ., weights = weights))

m11 <- datm %>%
  filter(label == "institutional_health_usjec_cat") %>%
  # In order to avoid colinearity by health quality ,race, and income,
  # must break this index into 10 pieces
  mutate(family_unity_usjec = ntile(family_unity_usjec, 10) %>% 
           scales::rescale(to = c(1,10))) %>%
  split(.$date) %>%
  map(~lm(formula = excess_deaths_5yr_period_rate ~ 
       family_unity_usjec + community_health_usjec + 
        institutional_health_usjec + collective_efficacy_usjec +
       #democrat_2016 + 
         workplaces +  
       pop_density +  health_care_capacity + uninsured + health_quality +
       pop_age_65_plus +  pop_female +  pop_black + #pop_hisp + 
       median_household_income + some_college + employee_muni, data = ., weights = weights))

m12 <- datm %>%
  filter(label == "collective_efficacy_usjec_cat") %>%
  # In order to avoid colinearity by health quality ,race, and income,
  # must break this index into 10 pieces
  mutate(family_unity_usjec = ntile(family_unity_usjec, 10) %>% 
           scales::rescale(to = c(1,10))) %>%
  split(.$date) %>%
  map(~lm(formula = excess_deaths_5yr_period_rate ~ 
       family_unity_usjec + community_health_usjec + 
        institutional_health_usjec + collective_efficacy_usjec +
       #democrat_2016 + 
         workplaces +  
       pop_density +  health_care_capacity + uninsured + health_quality +
       pop_age_65_plus +  pop_female +  pop_black + #pop_hisp + 
       median_household_income + some_college + employee_muni, data = ., weights = weights))

# Calculate the highest VIF among them; all are below 10, some around 7. Pretty okay.
#for example:
#m6 %>%
#  map(~car::vif(.) %>% max())
```

### Kyne & Aldrich

```{r}

texreg::htmlreg(
  c(m1,m2),
  bold = 0.10,
  stars = c(0.001, 0.01, 0.05, 0.10),
  caption.above = TRUE,
  caption = "<b>Matched OLS models of Excess Death Rates for US Counties by Time Period</b><br><i>Based on Matched Samples of counties observed between February 1 and September 26</i><br>Using Kyne & Aldrich Social Capital Indices",
  custom.header = list("Overall Social Capital<br>(Matched Overall)" = 1:6,
                       "Social Capital Subtypes<br>(Matched Overall)" = 7:12),
  custom.model.names = rep(c("2/1 - 4/25", "4/26 - 5/16", "5/17 - 5/23", 
                         "5/24 - 8/22", "8/23 - 8/29", "8/30 - 9/26"), 2),
  file = "viz/table_11.html",
  custom.coef.map = list(
    # Social Capital Effects
    "socialcap" = "Social Capital",
    "bonding" = "Bonding Social Capital",
    "bridging" = "Bridging Social Capital",
    "linking" = "Linking Social Capital",
    # Mobility
    "workplaces" = "Average % Change in Mobility (Workplaces)",
    # Governance
        "democrat_2016" = "% Voted Democrat in 2016 Presidential Election",
    "employee_muni" = "Municipal Employees per 1000 residents",
    # Health Care Capacity
    "health_care_capacity" = "Health Care Capacity Index",
    "uninsured" = "% Uninsured",
    # Health Conditions
    "health_quality" = "Quality of Health Index",
    # Socioeconomics
        "median_household_income" = "Median Household Income",
    "some_college" = "% Some college or more",
    # Demographics
  "pop_age_65_plus" = "% Age 65 or over",
    "pop_female" = "% Women",
    "pop_black" = "% Black",
    "pop_density" = "Population Density",
    "(Intercept)" = "Constant"),
  groups = list("<b>Social Capital</b>" = 1:4,
                "<b>Mobility</b>" = 5,
                "<b>Politics & Governance</b>" = 6:7,
                "<b>Health Care</b>" = 8:10,
                "<b>Socioeconomics</b>" = 11:12,
                "<b>Demographics</b>" = 13:17),
  include.fstat = TRUE,
  custom.note = "*** signifies statistical significance at p < 0.001, ** at p < 0.01, * at p < 0.05, and . at p < 0.10. Effect sizes have been standardized from 1 (min) to 10 (max) and can be compared among continuous variables. All variance inflation factor (VIF) scores were below 10. To avoid colinearity, health care capacity, health quality, workplace mobility, and the percent of democrats, women, and African Americans were cut into 8 quantiles, population density was squared, and income was cut into 6 quantiles.")


texreg::htmlreg(
  c(m3),
  bold = 0.10,
  stars = c(0.001, 0.01, 0.05, 0.10),
  caption.above = TRUE,
  caption = "<b>Matched OLS models of Excess Death Rates for US Counties by Time Period</b><br><i>Based on Matched Samples of counties observed between February 1 and September 26</i><br>Using Kyne & Aldrich Social Capital Indices",
  custom.header = list("Social Capital Subtypes<br>(Matched for All)" = 1:6),
  custom.model.names = rep(c("2/1 - 4/25", "4/26 - 5/16", "5/17 - 5/23", 
                         "5/24 - 8/22", "8/23 - 8/29", "8/30 - 9/26"), 1),
  file = "viz/table_12.html",
  custom.coef.map = list(
    # Social Capital Effects
    "socialcap" = "Social Capital",
    "bonding" = "Bonding Social Capital",
    "bridging" = "Bridging Social Capital",
    "linking" = "Linking Social Capital",
    # Mobility
    "workplaces" = "Average % Change in Mobility (Workplaces)",
    # Governance
        "democrat_2016" = "% Voted Democrat in 2016 Presidential Election",
    "employee_muni" = "Municipal Employees per 1000 residents",
    # Health Care Capacity
    "health_care_capacity" = "Health Care Capacity Index",
    "uninsured" = "% Uninsured",
    # Health Conditions
    "health_quality" = "Quality of Health Index",
    # Socioeconomics
        "median_household_income" = "Median Household Income",
    "some_college" = "% Some college or more",
    # Demographics
  "pop_age_65_plus" = "% Age 65 or over",
    "pop_female" = "% Women",
    "pop_black" = "% Black",
    "pop_density" = "Population Density",
    "(Intercept)" = "Constant"),
  groups = list("<b>Social Capital</b>" = 1:3,
                "<b>Mobility</b>" = 4,
                "<b>Politics & Governance</b>" = 5:6,
                "<b>Health Care</b>" = 7:9,
                "<b>Socioeconomics</b>" = 10:11,
                "<b>Demographics</b>" = 12:16),
  include.fstat = TRUE,
  custom.note = "*** signifies statistical significance at p < 0.001, ** at p < 0.01, * at p < 0.05, and . at p < 0.10. Effect sizes have been standardized from 1 (min) to 10 (max) and can be compared among continuous variables. All variance inflation factor (VIF) scores were below 10. To avoid colinearity, health care capacity, health quality, workplace mobility, and the percent of democrats, women, and African Americans were cut into 8 quantiles, population density was squared, and income was cut into 6 quantiles.")



texreg::htmlreg(
  c(m4,m5,m6),
  bold = 0.10,
  stars = c(0.001, 0.01, 0.05, 0.10),
  caption.above = TRUE,
  caption = "<b>Matched OLS models of Excess Death Rates for US Counties by Time Period</b><br><i>Based on Matched Samples of counties observed between February 1 and September 26</i><br>Using Kyne & Aldrich Social Capital Indices",
  custom.header = list("Bonding Social Capital<br>(Matched)" = 1:6,
                       "Bridging Social Capital<br>(Matched)" = 7:12,
                       "Linking Social Capital<br>(Matched)" = 13:18),
  custom.model.names = rep(c("2/1 - 4/25", "4/26 - 5/16", "5/17 - 5/23", 
                         "5/24 - 8/22", "8/23 - 8/29", "8/30 - 9/26"), 3),
  file = "viz/table_13.html",
  custom.coef.map = list(
    # Social Capital Effects
    "socialcap" = "Social Capital",
    "bonding" = "Bonding Social Capital",
    "bridging" = "Bridging Social Capital",
    "linking" = "Linking Social Capital",
    # Mobility
    "workplaces" = "Average % Change in Mobility (Workplaces)",
    # Governance
        "democrat_2016" = "% Voted Democrat in 2016 Presidential Election",
    "employee_muni" = "Municipal Employees per 1000 residents",
    # Health Care Capacity
    "health_care_capacity" = "Health Care Capacity Index",
    "uninsured" = "% Uninsured",
    # Health Conditions
    "health_quality" = "Quality of Health Index",
    # Socioeconomics
        "median_household_income" = "Median Household Income",
    "some_college" = "% Some college or more",
    # Demographics
  "pop_age_65_plus" = "% Age 65 or over",
    "pop_female" = "% Women",
    "pop_black" = "% Black",
    "pop_density" = "Population Density",
    "(Intercept)" = "Constant"),
  groups = list("<b>Social Capital</b>" = 1:3,
                "<b>Mobility</b>" = 4,
                "<b>Politics & Governance</b>" = 5:6,
                "<b>Health Care</b>" = 7:9,
                "<b>Socioeconomics</b>" = 10:11,
                "<b>Demographics</b>" = 12:16),
  include.fstat = TRUE,
  custom.note = "*** signifies statistical significance at p < 0.001, ** at p < 0.01, * at p < 0.05, and . at p < 0.10. Effect sizes have been standardized from 1 (min) to 10 (max) and can be compared among continuous variables. All variance inflation factor (VIF) scores were below 10. To avoid colinearity, health care capacity, health quality, workplace mobility, and the percent of democrats, women, and African Americans were cut into 8 quantiles, population density was squared, and income was cut into 6 quantiles.")

# For USJEC subindex models, family unity was broken into 10 quantiles to avoid colinearity problems with income, health, and race covariates. 
```

### Alternatives

```{r}
texreg::htmlreg(
  c(m7,m8),
  bold = 0.10,
  stars = c(0.001, 0.01, 0.05, 0.10),
  caption.above = TRUE,
  caption = "<b>Matched OLS models of Excess Death Rates for US Counties by Time Period</b><br><i>Based on Matched Samples of counties observed between February 1 and September 26</i><br>Using Alternative Social Capital Indices",
  custom.header = list("Rupasingha et al.<br>Social Capital<br>(Matched)" = 1:6,
                       "USJEC<br>Social Capital<br>(Matched)" = 7:12),
  custom.model.names = rep(c("2/1 - 4/25", "4/26 - 5/16", "5/17 - 5/23", 
                         "5/24 - 8/22", "8/23 - 8/29", "8/30 - 9/26"), 2),
  file = "viz/table_14.html",
  custom.coef.map = list(
    # Social Capital Effects
    "socialcap" = "Social Capital",
    "sci_psu" = "Social Capital",
    "sci_usjec" = "Social Capital",
    "bonding" = "Bonding Social Capital",
    "bridging" = "Bridging Social Capital",
    "linking" = "Linking Social Capital",
    # Mobility
    "workplaces" = "Average % Change in Mobility (Workplaces)",
    # Governance
        "democrat_2016" = "% Voted Democrat in 2016 Presidential Election",
    "employee_muni" = "Municipal Employees per 1000 residents",
    # Health Care Capacity
    "health_care_capacity" = "Health Care Capacity Index",
    "uninsured" = "% Uninsured",
    # Health Conditions
    "health_quality" = "Quality of Health Index",
    # Socioeconomics
        "median_household_income" = "Median Household Income",
    "some_college" = "% Some college or more",
    # Demographics
  "pop_age_65_plus" = "% Age 65 or over",
    "pop_female" = "% Women",
    "pop_black" = "% Black",
    "pop_density" = "Population Density",
    "(Intercept)" = "Constant"),
  groups = list("<b>Social Capital</b>" = 1,
                "<b>Mobility</b>" = 2,
                "<b>Politics & Governance</b>" = 3:4,
                "<b>Health Care</b>" = 5:7,
                "<b>Socioeconomics</b>" = 8:9,
                "<b>Demographics</b>" = 10:13),
  include.fstat = TRUE,
  custom.note = "*** signifies statistical significance at p < 0.001, ** at p < 0.01, * at p < 0.05, and . at p < 0.10. Effect sizes have been standardized from 1 (min) to 10 (max) and can be compared among continuous variables. All variance inflation factor (VIF) scores were below 10. To avoid colinearity, health care capacity, health quality, workplace mobility, and the percent of democrats, women, and African Americans were cut into 8 quantiles, population density was squared, and income was cut into 6 quantiles.")



texreg::htmlreg(
  c(m9,m10),
  bold = 0.10,
  stars = c(0.001, 0.01, 0.05, 0.10),
  caption.above = TRUE,
  caption = "<b>Matched OLS models of Excess Death Rates for US Counties by Time Period</b><br><i>Based on Matched Samples of counties observed between February 1 and September 26</i><br>Using Alternative Social Capital Indices",
  custom.header = list("Family Unity<br>(Matched)" = 1:6,
                       "Community Health<br>(Matched)" = 7:12),
  custom.model.names = rep(c("2/1 - 4/25", "4/26 - 5/16", "5/17 - 5/23", 
                         "5/24 - 8/22", "8/23 - 8/29", "8/30 - 9/26"), 2),
  file = "viz/table_15.html",
  custom.coef.map = list(
    # Social Capital Effects
    # other types
    "family_unity_usjec" = "Family Unity Index",
    "community_health_usjec" = "Community Health Index",
    "collective_efficacy_usjec" = "Collective Efficacy Index",
    "institutional_health_usjec" = "Institutional Health Index",
    # Mobility
    "workplaces" = "Average % Change in Mobility (Workplaces)",
    # Governance
        "democrat_2016" = "% Voted Democrat in 2016 Presidential Election",
    "employee_muni" = "Municipal Employees per 1000 residents",
    # Health Care Capacity
    "health_care_capacity" = "Health Care Capacity Index",
    "uninsured" = "% Uninsured",
    # Health Conditions
    "health_quality" = "Quality of Health Index",
    # Socioeconomics
        "median_household_income" = "Median Household Income",
    "some_college" = "% Some college or more",
    # Demographics
  "pop_age_65_plus" = "% Age 65 or over",
    "pop_female" = "% Women",
    "pop_black" = "% Black",
    "pop_density" = "Population Density",
    "(Intercept)" = "Constant"),
  groups = list("<b>Social Capital</b>" = 1:4,
                "<b>Mobility</b>" = 5,
                "<b>Politics & Governance</b>" = 6,
                "<b>Health Care</b>" = 7:9,
                "<b>Socioeconomics</b>" = 10:11,
                "<b>Demographics</b>" = 12:16),
  include.fstat = TRUE,
  custom.note = "*** signifies statistical significance at p < 0.001, ** at p < 0.01, * at p < 0.05, and . at p < 0.10. Effect sizes have been standardized from 1 (min) to 10 (max) and can be compared among continuous variables. All variance inflation factor (VIF) scores were below 10. To avoid colinearity, health care capacity, health quality, workplace mobility, and the percent of democrats, women, and African Americans were cut into 8 quantiles, population density was squared, and income was cut into 6 quantiles.")


texreg::htmlreg(
  c(m11,m12),
  bold = 0.10,
  stars = c(0.001, 0.01, 0.05, 0.10),
  caption.above = TRUE,
  caption = "<b>Matched OLS models of Excess Death Rates for US Counties by Time Period</b><br><i>Based on Matched Samples of counties observed between February 1 and September 26</i><br>Using Alternative Social Capital Indices",
  custom.header = list("Institutional Health<br>(Matched)" = 1:6,
                       "Collective Efficacy<br>(Matched)" = 7:12),
  custom.model.names = rep(c("2/1 - 4/25", "4/26 - 5/16", "5/17 - 5/23", 
                         "5/24 - 8/22", "8/23 - 8/29", "8/30 - 9/26"), 2),
  file = "viz/table_16.html",
  custom.coef.map = list(
    # Social Capital Effects
    # other types
    "family_unity_usjec" = "Family Unity Index",
    "community_health_usjec" = "Community Health Index",
    "collective_efficacy_usjec" = "Collective Efficacy Index",
    "institutional_health_usjec" = "Institutional Health Index",
    # Mobility
    "workplaces" = "Average % Change in Mobility (Workplaces)",
    # Governance
        "democrat_2016" = "% Voted Democrat in 2016 Presidential Election",
    "employee_muni" = "Municipal Employees per 1000 residents",
    # Health Care Capacity
    "health_care_capacity" = "Health Care Capacity Index",
    "uninsured" = "% Uninsured",
    # Health Conditions
    "health_quality" = "Quality of Health Index",
    # Socioeconomics
        "median_household_income" = "Median Household Income",
    "some_college" = "% Some college or more",
    # Demographics
  "pop_age_65_plus" = "% Age 65 or over",
    "pop_female" = "% Women",
    "pop_black" = "% Black",
    "pop_density" = "Population Density",
    "(Intercept)" = "Constant"),
  groups = list("<b>Social Capital</b>" = 1:4,
                "<b>Mobility</b>" = 5,
                "<b>Politics & Governance</b>" = 6,
                "<b>Health Care</b>" = 7:9,
                "<b>Socioeconomics</b>" = 10:11,
                "<b>Demographics</b>" = 12:16),
  include.fstat = TRUE,
  custom.note = "*** signifies statistical significance at p < 0.001, ** at p < 0.01, * at p < 0.05, and . at p < 0.10. Effect sizes have been standardized from 1 (min) to 10 (max) and can be compared among continuous variables. All variance inflation factor (VIF) scores were below 10. To avoid colinearity, health care capacity, health quality, workplace mobility, and the percent of democrats, women, and African Americans were cut into 8 quantiles, population density was squared, and income was cut into 6 quantiles.")

```

# 5. Visualization

### Correlations

```{r}
# Import data as unbalanced panel
datun <- read_rds("dataset.rds") %>%
  # Format all numeric variables
  # Rescale each from 1 to 10
  mutate_at(vars(workplaces:pop_density),
            funs(scale(.) %>% as.numeric()))  %>%
  mutate(date = factor(date)) %>%
  arrange(fips, date)


# Identify the balanced panel
balanced <- read_rds("dataset.rds") %>% 
  select(date, fips) %>%
  group_by(fips) %>%
  count() %>%
  filter(n == 6) %>%
  select(fips) %>% unlist()


# Filter to balanced panel
dat <- datun %>%
  filter(fips %in% balanced) %>%
  as.data.frame() 

# Write a function to extract matched data
get_matched_data = function(matchmodel){
  dat %>%
    # Join into it
    left_join(by = c("fips"),
              # The coarsened exact matching weights
              y = dat %>%
                filter(date == "2020-04-25") %>%
                # From the specified matched sample
                mutate(weights = matchmodel$weights) %>%
                select(fips, weights)) %>%
    filter(weights > 0) %>% 
    mutate(label = matchmodel$formula[2] %>% as.character() %>% str_remove("_cat[(][)]")) %>%
    return()
}

# Get the matched datasets
datm <- read_rds("analysis/matched_models.rds") %>%
  map_dfr(~get_matched_data(.)) 


out <- bind_rows(
  datm %>%
    # Zoom into just main cases
    select(contains("period"),socialcap, bonding, bridging, linking,
           sci_psu, sci_usjec, 
            "collective_efficacy_usjec","community_health_usjec",
           "family_unity_usjec", "institutional_health_usjec", weights) %>%
    pivot_longer(
      cols = c(socialcap, bonding, bridging, linking,
               sci_psu, sci_usjec, contains("usjec")),
      names_to = "measure",
      values_to = "value") %>%
    mutate(type = "Matched Panel"),
  
  # Get the balanced panel
  dat %>%
    # Zoom into just main cases
    select(contains("period"),socialcap, bonding, bridging, linking,
           sci_psu, sci_usjec, 
            "collective_efficacy_usjec","community_health_usjec",
           "family_unity_usjec", "institutional_health_usjec") %>%
    pivot_longer(
      cols = c(socialcap, bonding, bridging, linking,
               sci_psu, sci_usjec, 
            "collective_efficacy_usjec","community_health_usjec",
           "family_unity_usjec", "institutional_health_usjec"),
      names_to = "measure",
      values_to = "value") %>%
    mutate(type = "Balanced Panel"),
  
  datun %>%
     # Zoom into just main cases
    select(contains("period"),socialcap, bonding, bridging, linking,
           sci_psu, sci_usjec, 
            "collective_efficacy_usjec","community_health_usjec",
           "family_unity_usjec", "institutional_health_usjec") %>%
    pivot_longer(
      cols = c(socialcap, bonding, bridging, linking,
               sci_psu, sci_usjec, contains("usjec")),
      names_to = "measure",
      values_to = "value") %>%
    mutate(type = "Unbalanced Panel"))

remove(dat, datm, datun)

result <- out %>%
  pivot_longer(cols = contains("period_rate"),
               names_to = "version", values_to = "outcome") %>%
  # Also, we need to apply the weights for the matched data
  mutate_at(vars(outcome, value), funs(if_else(type == "Matched Panel", .*weights, .))) %>%
  group_by(type, measure, version) %>%
  summarize(cor = cor(outcome, value, use = "pairwise.complete.obs")) %>%
  ungroup() %>%
  mutate(type = factor(type, levels = c("Unbalanced Panel", 
                                        "Balanced Panel", "Matched Panel")),
         measure = measure %>% recode_factor(
           "socialcap" = "Social Capital\n(Kyne & Aldrich)",
           "sci_psu" = "Social Capital\n(Rupasingha et al.)",
           "sci_usjec" = "Social Capital\n(USJEC)",
           "bonding" = "Bonding Social Capital\n(Kyne & Aldrich)",
           "bridging" = "Bridging Social Capital\n(Kyne & Aldrich)",
           "linking" = "Linking Social Capital\n(Kyne & Aldrich)",
           "family_unity_usjec" = "Family Unity\n(USJEC)",
           "community_health_usjec" = "Community Health\n(USJEC)",
           "institutional_health_usjec" = "Institutional Health\n(USJEC)",
           "collective_efficacy_usjec" = "Collective Efficacy\n(USJEC)") %>%
           factor(levels = levels(.) %>% rev()),
         version = version %>% recode_factor(
           "excess_deaths_1yr_period_rate" = "1-year\nrange\n(2019)",
           "excess_deaths_3yr_period_rate" = "3-year\naverage\n(2017-2019)",
           "excess_deaths_5yr_period_rate" = "5-year\naverage\n(2015-2019)")) 

result %>%
  ggplot(mapping = aes(x = version, y = measure, fill = cor, label = round(cor, 2))) +
  geom_tile(color = "white", size = 1.15) +
  geom_text() +
  scale_fill_gradient2(low = "#DC267F", high = "#648FFF", midpoint = 0, mid = "white") +
  facet_grid(~type, scales = "free") +
  theme_classic(base_size = 14) +
  theme(legend.position = "bottom",
        panel.border = element_rect(fill = NA, color = "black"),
        panel.spacing = unit(0.5, "cm")) +
  guides(fill = guide_colorsteps(barwidth = 10, barheight = 0.5)) +
  labs(x = "Excess Deaths per 100,000 residents\nby time range of comparison",
       y = "Type of Social Capital", fill = "Correlation (-1 to 1)\n     (Pearons's r)") +
  ggsave("viz/figure_correlations.png", dpi = 500, width = 11, height = 6.5)
```

```{r}
rm(list = ls())

```


### Validate Excess Deaths

```{r}

# Import data as unbalanced panel
datun <- read_rds("dataset.rds") %>%
  # Format all numeric variables
  # Rescale each from 1 to 10
  mutate_at(vars(workplaces:pop_density),
            funs(scale(.) %>% as.numeric()))  %>%
  mutate(date = factor(date)) %>%
  arrange(fips, date)


# Identify the balanced panel
balanced <- read_rds("dataset.rds") %>% 
  select(date, fips) %>%
  group_by(fips) %>%
  count() %>%
  filter(n == 6) %>%
  select(fips) %>% unlist()


# Filter to balanced panel
dat <- datun %>%
  filter(fips %in% balanced) %>%
  as.data.frame() 

# Write a function to extract matched data
get_matched_data = function(matchmodel){
  dat %>%
    # Join into it
    left_join(by = c("fips"),
              # The coarsened exact matching weights
              y = dat %>%
                filter(date == "2020-04-25") %>%
                # From the specified matched sample
                mutate(weights = matchmodel$weights) %>%
                select(fips, weights)) %>%
    filter(weights > 0) %>% 
    mutate(label = matchmodel$formula[2] %>% as.character() %>% str_remove("_cat[(][)]")) %>%
    return()
}

# Get the matched datasets
datm <- read_rds("analysis/matched_models.rds") %>%
  map_dfr(~get_matched_data(.)) 



out <- bind_rows(
  datm %>%
    # Zoom into just main cases
    select(contains("period"),socialcap, bonding, bridging, linking,
           sci_psu, sci_usjec, 
            "collective_efficacy_usjec","community_health_usjec",
           "family_unity_usjec", "institutional_health_usjec", weights) %>%
    mutate(type = "Matched Panel"),
  
  # Get the balanced panel
  dat %>%
    # Zoom into just main cases
    select(contains("period"),socialcap, bonding, bridging, linking,
           sci_psu, sci_usjec, 
            "collective_efficacy_usjec","community_health_usjec",
           "family_unity_usjec", "institutional_health_usjec") %>%
    mutate(type = "Balanced Panel"),
  
  datun %>%
     # Zoom into just main cases
    select(contains("period"),socialcap, bonding, bridging, linking,
           sci_psu, sci_usjec, 
            "collective_efficacy_usjec","community_health_usjec",
           "family_unity_usjec", "institutional_health_usjec") %>%
    mutate(type = "Unbalanced Panel"))

out %>%
  # Also, we need to apply the weights for the matched data
  mutate(excess_deaths_1yr_period_rate = if_else(type == "Matched Panel", excess_deaths_1yr_period_rate*weights, excess_deaths_1yr_period_rate),
         excess_deaths_3yr_period_rate = if_else(type == "Matched Panel", excess_deaths_3yr_period_rate*weights, excess_deaths_3yr_period_rate),
         
         excess_deaths_5yr_period_rate = if_else(type == "Matched Panel", excess_deaths_5yr_period_rate*weights, excess_deaths_5yr_period_rate)) %>%
  # Now for each type of dataset,
  # Calculate the correlation between each measure
  group_by(type) %>%
  summarize(cor_1yr_3yr = cor(excess_deaths_1yr_period_rate, excess_deaths_3yr_period_rate,
                              use = "pairwise.complete.obs"),
            cor_3yr_5yr = cor(excess_deaths_3yr_period_rate, excess_deaths_5yr_period_rate,
                              use = "pairwise.complete.obs"),
            cor_1yr_5yr = cor(excess_deaths_1yr_period_rate, excess_deaths_5yr_period_rate,
                              use = "pairwise.complete.obs")) %>%
  ungroup() %>%
  pivot_longer(cols = c(cor_1yr_3yr,cor_3yr_5yr,cor_1yr_5yr), 
               names_to = "measure", values_to = "cor") %>%
  mutate(measure = measure %>% recode_factor(
    "cor_3yr_5yr" = "3-year vs. 5-year",
    "cor_1yr_5yr" = "1-year vs. 5-year",
    "cor_1yr_3yr" = "1-year vs. 3-year")) %>%
  ggplot(mapping = aes(x = type, y = measure, label = round(cor, 3))) +
  geom_tile(color = "lightgrey", size = 1.15, fill = "white") +
  geom_text() +
  scale_fill_gradient2(low = "#DC267F", high = "#648FFF", midpoint = 0, mid = "white") +
  scale_x_discrete(position = "top") +
  theme_classic(base_size = 14) +
  theme(legend.position = "bottom",
        panel.border = element_rect(fill = NA, color = "black"),
        panel.spacing = unit(0.5, "cm"),
        plot.caption = element_text(hjust = 0.5),
        plot.subtitle = element_text(hjust = 0.5)) +
  guides(fill = guide_colorsteps(barwidth = 10, barheight = 0.5)) +
  labs(subtitle = "Correlation between Different Measures\nof Excess Deaths per 100,000 residents",
       y = "Time Range\nof Comparison",
       x = "Sample", caption = "Correlation (-1 to 1) (Pearson's r)\nNote: Time Range refers to comparison point against 2020 deaths.") +
  ggsave("viz/figure_outcome_correlation.png", dpi = 500, width = 7, height = 3)
```


### Descriptive Statistics

```{r}


# Import data as unbalanced panel
datun <- read_rds("dataset.rds") %>%
  select(fips, date)


# Identify the balanced panel
balanced <- read_rds("dataset.rds") %>% 
  select(date, fips) %>%
  group_by(fips) %>%
  count() %>%
  filter(n == 6) %>%
  select(fips) %>% unlist()


# Filter to balanced panel
dat <- datun %>%
  filter(fips %in% balanced) %>%
  as.data.frame() 

# Write a function to extract matched data
get_matched_data = function(matchmodel){
  dat %>%
    # Join into it
    left_join(by = c("fips"),
              # The coarsened exact matching weights
              y = dat %>%
                filter(date == "2020-04-25") %>%
                # From the specified matched sample
                mutate(weights = matchmodel$weights) %>%
                select(fips, weights)) %>%
    filter(weights > 0) %>% 
    mutate(label = matchmodel$formula[2] %>% as.character() %>% str_remove("_cat[(][)]")) %>%
    return()
}

# Get the matched datasets
datm <- read_rds("analysis/matched_models.rds") %>%
  map_dfr(~get_matched_data(.)) 


# Identify main variables of interest
myvars <- c("excess_deaths_1yr_period_rate",
            "excess_deaths_3yr_period_rate", 
            "excess_deaths_5yr_period_rate", 
            "socialcap","bonding", "bridging","linking",
            "sci_psu",
            "sci_usjec",
          "family_unity_usjec",
          "community_health_usjec",
          "collective_efficacy_usjec",
          "institutional_health_usjec",
          "democrat_2016", "employee_muni", 
          "workplaces","pop_density",
          "health_care_capacity", "uninsured", "health_quality",
          "pop_age_65_plus", "pop_female", "pop_black",
          "median_household_income", "some_college")
  

out <- bind_rows(
  # It doesn't make sense to include the matched ones,
  # since each of the 12 should have their own descriptives
  #datm %>%
  #  mutate(type = "Matched Panel"),
  
  # Get the balanced panel
  dat %>%
    mutate(type = "Balanced Panel"),
  
  datun %>%
    mutate(type = "Unbalanced Panel")) %>%
  # Join in unimputed data
  left_join(by = c("fips", "date"),
            y = read_rds("dataset_unimputed.rds") %>%
              select(fips, date, myvars))

remove(datm, dat, datun)

# Now, let's calculate some descriptive statistics for these variables

viz <- out %>%
  # Multiply percentages to 100
  mutate_at(vars(pop_black, pop_age_65_plus, pop_female,
                 some_college, democrat_2016,uninsured),
            funs(. * 100)) %>%
  mutate(median_household_income = median_household_income / 1000,
            pop_density = pop_density / 100) %>%
  pivot_longer(
    cols = c(myvars), names_to = "variable", values_to = "value") %>%
  group_by(type, variable) %>%
  summarize(mean = mean(value, na.rm = TRUE),
            sd = sd(value, na.rm = TRUE),
            median = median(value, na.rm = TRUE),
            min = min(value, na.rm = TRUE),
            max = max(value, na.rm = TRUE),
            missing = sum(is.na(value)) / n() * 100) %>%
  mutate_at(vars(mean,sd,median, min, max, missing),
            funs(round(., 2))) %>%
  mutate(variable = variable %>% recode_factor(
    "excess_deaths_1yr_period_rate" = "Excess Deaths (1-year)\nper 100,000 residents",
    "excess_deaths_3yr_period_rate" = "Excess Deaths (3-year)\nper 100,000 residents",
    "excess_deaths_5yr_period_rate" = "Excess Deaths (5-year)\nper 100,000 residents",
    # Social Capital Effects
    "socialcap" = "Social Capital",
    "sci_psu" = "Social Capital\n(Rupasingha et al.)",
    "sci_usjec" = "Social Capital\n(USJEC)",
    # types
    "bonding" = "Bonding Social Capital",
    "bridging" = "Bridging Social Capital",
    "linking" = "Linking Social Capital",
    # other types
    "family_unity_usjec" = "Family Unity",
    "community_health_usjec" = "Community Health",
    "collective_efficacy_usjec" = "Collective Efficacy",
    "institutional_health_usjec" = "Institutional Health",
    # Mobility
    "workplaces" = "Average % Change\nin Mobility (Workplaces)",
    # Governance
    "democrat_2016" = "% Voted Democrat in\n2016 Presidential Election",
    "employee_muni" = "Municipal Employees\nper 1000 residents",
    # Health Care Capacity
    "health_care_capacity" = "Health Care\nCapacity Index",
    "uninsured" = "% Uninsured",
    # Health Conditions
    "health_quality" = "Quality of Health",
    # Socioeconomics
    "median_household_income" = "Median Household\nIncome (thousands)",
    "some_college" = "% Some college or more",
    # Demographics
    "pop_age_65_plus" = "% Age 65 or over",
    "pop_female" = "% Women",
    "pop_black" = "% Black",
    "pop_density" = "Pop. Density\n(hundreds per sq.km.)") %>%
      factor(levels = levels(.) %>% rev())) %>%
  pivot_longer(cols = -c(type, variable), names_to = "stat", values_to = "value") %>%
  mutate(stat = stat %>% recode_factor(
    "mean" = "Mean",
    "median" = "Median",
    "sd" = "Std.\nDev.",
    "min" = "Min",
    "max" = "Max",
    "missing" = "(%)\nMissing"),
    type = factor(type, levels = c("Unbalanced Panel", "Balanced Panel")))


viz %>%
  ggplot(mapping = aes(x = stat, y = variable, label = value)) +
  geom_tile(color = "lightgrey", size = 1.15, fill = "white") +
  geom_text() +
  scale_fill_gradient2(low = "#DC267F", high = "#648FFF", midpoint = 0, mid = "white") +
  scale_x_discrete(position = "top") +
  facet_grid(~type, scales = "free") +
  theme_classic(base_size = 14) +
  theme(legend.position = "bottom",
        panel.border = element_rect(fill = NA, color = "black"),
        panel.spacing = unit(0.5, "cm"),
        plot.caption = element_text(hjust = 0.5),
        plot.subtitle = element_text(hjust = 0.5)) +
  guides(fill = guide_colorsteps(barwidth = 10, barheight = 0.5)) +
  labs(subtitle = "Descriptive Statistics of Outcomes and Covariates",
       y = NULL,
       x = NULL) +
  ggsave("viz/figure_descriptives.png", dpi = 500, width = 10, height = 11)


```

## Linking Simulation

```{r}

# Identify main variables of interest
myvars <- c("excess_deaths_1yr_period_rate",
            "excess_deaths_3yr_period_rate", 
            "excess_deaths_5yr_period_rate", 
            "socialcap","bonding", "bridging","linking",
            "sci_psu",
            "sci_usjec",
          "family_unity_usjec",
          "community_health_usjec",
          "collective_efficacy_usjec",
          "institutional_health_usjec",
          "democrat_2016", "employee_muni", 
          "workplaces","pop_density",
          "health_care_capacity", "uninsured", "health_quality",
          "pop_age_65_plus", "pop_female", "pop_black",
          "median_household_income", "some_college")
  

# Import data as unbalanced panel
datun <- read_rds("dataset.rds") %>%
  # Format all numeric variables
  # Rescale each as Z-scores
  mutate_at(vars(workplaces:pop_density),
            funs(scale(.) %>% as.numeric()))  %>%
  mutate(date = factor(date)) %>%
  arrange(fips, date)


# Identify the balanced panel
balanced <- read_rds("dataset.rds") %>% 
  select(date, fips) %>%
  group_by(fips) %>%
  count() %>%
  filter(n == 6) %>%
  select(fips) %>% unlist()


# Filter to balanced panel
dat <- datun %>%
  filter(fips %in% balanced) %>%
  as.data.frame() 

# Write a function to extract matched data
get_matched_data = function(matchmodel){
  dat %>%
    # Join into it
    left_join(by = c("fips"),
              # The coarsened exact matching weights
              y = dat %>%
                filter(date == "2020-04-25") %>%
                # From the specified matched sample
                mutate(weights = matchmodel$weights) %>%
                select(fips, weights)) %>%
    filter(weights > 0) %>% 
    mutate(label = matchmodel$formula[2] %>% as.character() %>% str_remove("_cat[(][)]")) %>%
    return()
}

# Get the matched datasets
datm <- read_rds("analysis/matched_models.rds") %>%
  map_dfr(~get_matched_data(.)) 


out <- bind_rows(
  datm %>%
    select(fips, date, weights, myvars, label) %>%
    mutate(type = "Matched Panel"),
  
  # Get the balanced panel
  dat %>%
    # Zoom into just main cases
    select(fips, date, myvars) %>%
    mutate(type = "Balanced Panel"),
  
  datun %>%
     # Zoom into just main cases
    select(fips, date, myvars) %>%
    mutate(type = "Unbalanced Panel"))

remove(dat, datm, datun)

mydates <- out$date %>% unique() %>% sort()

# Kyne & Aldrich, broken into three subindices
m1 <- out %>%
  filter(label == "socialcap_cat",
         date == mydates[1]) %>%
  zelig(formula = excess_deaths_5yr_period_rate ~ 
       bonding + bridging + linking + 
       democrat_2016 + workplaces +  
       pop_density +  health_care_capacity + uninsured + health_quality +
       pop_age_65_plus +  pop_female +  pop_black + #pop_hisp + 
       median_household_income + some_college + employee_muni,
       weights = "weights", model = "ls")

m2 <- out %>%
  filter(label == "socialcap_cat",
         date == mydates[2]) %>%
  zelig(formula = excess_deaths_5yr_period_rate ~ 
       bonding + bridging + linking + 
       democrat_2016 + workplaces +  
       pop_density +  health_care_capacity + uninsured + health_quality +
       pop_age_65_plus +  pop_female +  pop_black + #pop_hisp + 
       median_household_income + some_college + employee_muni,
       weights = "weights", model = "ls")

m3 <- out %>%
  filter(label == "socialcap_cat",
         date == mydates[3]) %>%
  zelig(formula = excess_deaths_5yr_period_rate ~ 
       bonding + bridging + linking + 
       democrat_2016 + workplaces +  
       pop_density +  health_care_capacity + uninsured + health_quality +
       pop_age_65_plus +  pop_female +  pop_black + #pop_hisp + 
       median_household_income + some_college + employee_muni,
       weights = "weights", model = "ls")

m4 <- out %>%
  filter(label == "socialcap_cat",
         date == mydates[4]) %>%
  zelig(formula = excess_deaths_5yr_period_rate ~ 
       bonding + bridging + linking + 
       democrat_2016 + workplaces +  
       pop_density +  health_care_capacity + uninsured + health_quality +
       pop_age_65_plus +  pop_female +  pop_black + #pop_hisp + 
       median_household_income + some_college + employee_muni,
       weights = "weights", model = "ls")

m5 <- out %>%
  filter(label == "socialcap_cat",
         date == mydates[5]) %>%
  zelig(formula = excess_deaths_5yr_period_rate ~ 
       bonding + bridging + linking + 
       democrat_2016 + workplaces +  
       pop_density +  health_care_capacity + uninsured + health_quality +
       pop_age_65_plus +  pop_female +  pop_black + #pop_hisp + 
       median_household_income + some_college + employee_muni,
       weights = "weights", model = "ls")


m6 <- out %>%
  filter(label == "socialcap_cat",
         date == mydates[6]) %>%
  zelig(formula = excess_deaths_5yr_period_rate ~ 
       bonding + bridging + linking + 
       democrat_2016 + workplaces +  
       pop_density +  health_care_capacity + uninsured + health_quality +
       pop_age_65_plus +  pop_female +  pop_black + #pop_hisp + 
       median_household_income + some_college + employee_muni,
       weights = "weights", model = "ls")

get_linking = function(mymodel){
  mymodel %>%
    setx(linking = seq(from = -2, to = 2, length.out = 50)) %>%
    sim() %>%
    zelig_qi_to_df() %>%
    qi_slimmer(qi_type = "ev", ci = 0.95) %>%
    return()
}

mysim <- list(m1,m2,m3,m4,m5,m6) %>%
  map_dfr(~get_linking(.), .id = "date") %>%
  mutate(date = date %>% recode_factor(
    "1" = "2/1 - 4/25",
    "2" = "4/26 - 5/16",
    "3" = "5/17 - 5/23*", 
    "4" = "5/24 - 8/22*", 
    "5" = "8/23 - 8/29", 
    "6" = "8/30 - 9/26"))

#out %>%
#  filter(label == "socialcap_cat") %>%
#  summarize(low = quantile(linking, 0.2),
#            high = quantile(linking, 0.8))
mysim %>% 
  ggplot(mapping = aes(x = linking, y = qi_ci_median, 
                       ymin = qi_ci_min, ymax = qi_ci_max,
                       fill = date, color = date)) +
  geom_hline(yintercept = 0, color = "darkgrey", linetype = "dashed") +
  geom_line() +
  geom_ribbon(alpha = 0.5) +
  facet_wrap(~date, scales = "free", ncol = 3) +
  guides(fill = FALSE, color = FALSE) +
  scale_fill_manual(values = c("#85ECA9", "#648FFF", "#785EF0", "#DC267F", "#FE6100","#FFB000")) +
  scale_color_manual(values = c("#85ECA9", "#648FFF", "#785EF0", "#DC267F", "#FE6100","#FFB000")) +
  theme_classic(base_size = 14) +
  theme(panel.border = element_rect(fill = NA, color = "black"),
        panel.spacing = unit(0.25, "cm")) +
  labs(x = "Linking Social Capital (Z-score)",
       y = "Excess Deaths per 100,000 residents\ncompared to 2015-2019 average\n(with simulated 95% confidence interval)") +
  ggsave("viz/figure_simulation.png", dpi = 500, width = 8, height = 5)

```


## Bonding Simulation

```{r}

# Identify main variables of interest
myvars <- c("excess_deaths_1yr_period_rate",
            "excess_deaths_3yr_period_rate", 
            "excess_deaths_5yr_period_rate", 
            "socialcap","bonding", "bridging","linking",
            "sci_psu",
            "sci_usjec",
          "family_unity_usjec",
          "community_health_usjec",
          "collective_efficacy_usjec",
          "institutional_health_usjec",
          "democrat_2016", "employee_muni", 
          "workplaces","pop_density",
          "health_care_capacity", "uninsured", "health_quality",
          "pop_age_65_plus", "pop_female", "pop_black",
          "median_household_income", "some_college")
  

# Import data as unbalanced panel
datun <- read_rds("dataset.rds") %>%
  # Format all numeric variables
  # Rescale each as Z-scores
  mutate_at(vars(workplaces:pop_density),
            funs(scale(.) %>% as.numeric()))  %>%
  mutate(date = factor(date)) %>%
  arrange(fips, date)


# Identify the balanced panel
balanced <- read_rds("dataset.rds") %>% 
  select(date, fips) %>%
  group_by(fips) %>%
  count() %>%
  filter(n == 6) %>%
  select(fips) %>% unlist()


# Filter to balanced panel
dat <- datun %>%
  filter(fips %in% balanced) %>%
  as.data.frame() 

# Write a function to extract matched data
get_matched_data = function(matchmodel){
  dat %>%
    # Join into it
    left_join(by = c("fips"),
              # The coarsened exact matching weights
              y = dat %>%
                filter(date == "2020-04-25") %>%
                # From the specified matched sample
                mutate(weights = matchmodel$weights) %>%
                select(fips, weights)) %>%
    filter(weights > 0) %>% 
    mutate(label = matchmodel$formula[2] %>% as.character() %>% str_remove("_cat[(][)]")) %>%
    return()
}

# Get the matched datasets
datm <- read_rds("analysis/matched_models.rds") %>%
  map_dfr(~get_matched_data(.)) 


out <- bind_rows(
  datm %>%
    select(fips, date, weights, myvars, label) %>%
    mutate(type = "Matched Panel"),
  
  # Get the balanced panel
  dat %>%
    # Zoom into just main cases
    select(fips, date, myvars) %>%
    mutate(type = "Balanced Panel"),
  
  datun %>%
     # Zoom into just main cases
    select(fips, date, myvars) %>%
    mutate(type = "Unbalanced Panel"))

remove(dat, datm, datun)

mydates <- out$date %>% unique() %>% sort()

# Kyne & Aldrich, broken into three subindices
m1 <- out %>%
  filter(label == "socialcap_cat") %>%
  zelig(formula = excess_deaths_5yr_period_rate ~ 
       bonding + bridging + linking + 
       democrat_2016 + workplaces +  
       pop_density +  health_care_capacity + uninsured + health_quality +
       pop_age_65_plus +  pop_female +  pop_black + #pop_hisp + 
       median_household_income + some_college + employee_muni +
         factor(date),
       weights = "weights", model = "ls")

# Let's make a ridiculously detailed simulation plot,
# with 50 observations each
mysim <- bind_rows(
  m1 %>%
    setx(bonding = seq(from = -2, to = 2, length.out = 50)) %>%
    sim() %>%
    zelig_qi_to_df() %>%
    qi_slimmer(qi_type = "ev", ci = 0.90) %>%
    mutate(type = "90%"),
  m1 %>%
    setx(bonding = seq(from = -2, to = 2, length.out = 50)) %>%
    sim() %>%
    zelig_qi_to_df() %>%
    qi_slimmer(qi_type = "ev", ci = 0.95) %>%
    mutate(type = "95%"),
  m1 %>%
    setx(bonding = seq(from = -2, to = 2, length.out = 50)) %>%
    sim() %>%
    zelig_qi_to_df() %>%
    qi_slimmer(qi_type = "ev", ci = 0.99) %>%
    mutate(type = "99%"),
  m1 %>%
    setx(bonding = seq(from = -2, to = 2, length.out = 50)) %>%
    sim() %>%
    zelig_qi_to_df() %>%
    qi_slimmer(qi_type = "ev", ci = 0.999) %>%
    mutate(type = "99.9%")) %>%
  mutate(type = factor(type, levels = c("90%", "95%", "99%", "99.9%"))) %>%
  select(bonding, type, qi_ci_median, qi_ci_min, qi_ci_max)


# Here's the simple version - the colors don't work great.
mysim %>% 
  ggplot(mapping = aes(x = bonding, y = qi_ci_median, 
                       ymin = qi_ci_min, ymax = qi_ci_max,
                       fill = type)) +
  geom_hline(yintercept = 0, color = "darkgrey", linetype = "dashed") +
  geom_line(color = "black") +
  geom_ribbon(alpha = 0.5) +
  #scale_fill_manual(values = c("#648FFF", "#FFB000")) +
  #scale_color_manual(values = c("#648FFF", "#FFB000")) +
  theme_classic(base_size = 14) +
  theme(panel.border = element_rect(fill = NA, color = "black"),
        panel.spacing = unit(0.25, "cm")) +
  labs(x = "Bonding Social Capital (Z-score)",
       y = "Excess Deaths per 100,000 residents\ncompared to 2015-2019 average\n(with simulated confidence intervals)",
       fill = "Simulated\nConfidence\nIntervals") 


ranks <- mysim %>% 
  pivot_longer(cols = contains("qi_"),
               names_to = "measure",
               values_to = "value") %>%
  filter(measure != "qi_ci_median") %>%
  group_by(bonding) %>%
  arrange(value) %>%
  mutate(order = paste("band", 1:n(), sep = "_")) %>%
  ungroup() %>%
  select(-measure, -type) %>%
  pivot_wider(id_cols = c(bonding),
              names_from = order,
              values_from = value) %>%
  rename(lower_999 = band_1,
         lower_99 = band_2,
         lower_95 = band_3,
         lower_90 = band_4,
         upper_90 = band_5,
         upper_95 = band_6,
         upper_99 = band_7,
         upper_999 = band_8) 

bands <- bind_rows(
  ranks %>%
    select(bonding, lower = lower_999, upper = lower_99) %>%
    mutate(type = "99.9%", version = "1"),
  ranks %>%
    select(bonding, lower = lower_99, upper = lower_95) %>%
    mutate(type = "99%", version = "2"),
  ranks %>%
    select(bonding, lower = lower_95, upper = lower_90) %>%
    mutate(type = "95%", version = "3"),
  ranks %>%
    select(bonding, lower = lower_90, upper = upper_90) %>%
    mutate(type = "90%", version = "4"),
  ranks %>%
    select(bonding, lower = upper_90, upper = upper_95) %>%
    mutate(type = "95%", version = "5"),
  ranks %>%
    select(bonding, lower = upper_95, upper = upper_99) %>%
    mutate(type = "99%", version = "6"),
  ranks %>%
    select(bonding, lower = upper_99, upper = upper_999) %>%
    mutate(type = "99.9%", version = "7")) %>%
  mutate(type = factor(type, levels = c("90%", "95%", "99%", "99.9%")))  %>%
  # Now join back in the median.
  # Note - since you've got so many different bands, you technically have several medians,
  # but they are all virtually identical. 
  # So, we're just going to grab one of them from the 99.9% version.
  left_join(by = c("bonding"), 
            y = mysim %>% select(bonding, type, qi_ci_median) %>%
              filter(type == "99.9%") %>% select(-type)) 


bands %>% 
  ggplot(mapping = aes(x = bonding, y = qi_ci_median, ymin = lower, ymax = upper,
                       group = version, 
                       fill = type)) +
  geom_ribbon(color = "white", size = 0.75) +
  geom_line(color = "white", size = 0.5) +
  viridis::scale_fill_viridis(discrete = TRUE, option = "plasma",
                              begin = 0.1, end = 0.8)  +
  theme_classic(base_size = 14) +
  theme(panel.border = element_rect(fill = NA, color = "black"),
        panel.spacing = unit(0.25, "cm")) +
  labs(x = "Bonding Social Capital (Z-score)",
       y = "Excess Deaths per 100,000 residents\ncompared to 2015-2019 average\n(with simulated confidence intervals)",
       fill = "Simulated\nConfidence\nIntervals") +
  ggsave("viz/figure_simulation_bonding.png", dpi = 500, width = 8, height = 5)
```


## T-tests

```{r}

# Identify main variables of interest
myvars <- c("excess_deaths_1yr_period_rate",
            "excess_deaths_3yr_period_rate", 
            "excess_deaths_5yr_period_rate", 
            "socialcap","bonding", "bridging","linking",
            "sci_psu",
            "sci_usjec",
          "family_unity_usjec",
          "community_health_usjec",
          "collective_efficacy_usjec",
          "institutional_health_usjec",
          "democrat_2016", "employee_muni", 
          "workplaces","pop_density",
          "health_care_capacity", "uninsured", "health_quality",
          "pop_age_65_plus", "pop_female", "pop_black",
          "median_household_income", "some_college")
  

# Import data as unbalanced panel
datun <- read_rds("dataset.rds") %>%
  # Format all numeric variables
  # Rescale each as Z-scores
  mutate_at(vars(workplaces:pop_density),
            funs(scale(.) %>% as.numeric()))  %>%
  mutate(date = factor(date)) %>%
  arrange(fips, date)


# Identify the balanced panel
balanced <- read_rds("dataset.rds") %>% 
  select(date, fips) %>%
  group_by(fips) %>%
  count() %>%
  filter(n == 6) %>%
  select(fips) %>% unlist()


# Filter to balanced panel
dat <- datun %>%
  filter(fips %in% balanced) %>%
  as.data.frame() 

# Write a function to extract matched data
get_matched_data = function(matchmodel){
  dat %>%
    # Join into it
    left_join(by = c("fips"),
              # The coarsened exact matching weights
              y = dat %>%
                filter(date == "2020-04-25") %>%
                # From the specified matched sample
                mutate(weights = matchmodel$weights) %>%
                select(fips, weights)) %>%
    filter(weights > 0) %>% 
    mutate(label = matchmodel$formula[2] %>% as.character() %>% str_remove("_cat[(][)]")) %>%
    return()
}

# Get the matched datasets
datm <- read_rds("analysis/matched_models.rds") %>%
  map_dfr(~get_matched_data(.)) 


out <- bind_rows(
  datm %>%
    filter(label %in% c("socialcap_cat", "all_cat", "bonding_cat")) %>%
    select(fips, date, weights, myvars, label, contains("_cat")) %>%
    mutate(type = label),
  
  # Get the balanced panel
  dat %>%
    # Zoom into just main cases
    select(fips, date, myvars,contains("_cat")) %>%
    mutate(type = "Balanced Panel"),
  
  datun %>%
     # Zoom into just main cases
    select(fips, date, myvars,contains("_cat")) %>%
    mutate(type = "Unbalanced Panel"))

remove(dat, datm, datun)

mydates <- out$date %>% unique() %>% sort()
out$type %>% unique()

get_t_test = function(mydata){
  
  mydiff <- mydata %>%
    group_by(value) %>%
    summarize(mean = mean(excess_deaths_5yr_period_rate, na.rm = TRUE)) %>%
    ungroup() %>%
    pivot_wider(names_from = value, values_from = mean) %>%
    mutate(diff = `1` - `0`)

  mytest <- mydata %>%
    infer::t_test(formula = excess_deaths_5yr_period_rate ~ value, 
                  order = c("1", "0"),
                  weights = weights)
  
  mytest %>%
    mutate(diff = mydiff$diff) %>%
    return()
}

# Run t-tests
out %>%
  mutate(weights = if_else(!is.na(weights), 1, weights)) %>%
  pivot_longer(cols = c(bonding_cat, bridging_cat, linking_cat),
               names_to = "measure", values_to = "value") %>%
  mutate(run = paste(type, measure, sep = "-")) %>%
  split(.$run) %>%
  map_dfr(~get_t_test(.), .id = "run") %>%
  separate(run, into = c("type", "measure"), sep = "-") %>%
    mutate(type = type %>% recode_factor(
    "bonding_cat" = "Matched for\nBonding\nSocial Capital",
    "all_cat" = "Matched for\nAll Social Capital\nSubtypes",
    "socialcap_cat" = "Matched for\nOverall\nSocial Capital",
    "Balanced Panel" = "Balanced Panel",
    "Unbalanced Panel" = "Unbalanced Panel") %>%
      factor(levels = levels(.) %>% rev())) %>%
  mutate(measure = measure %>% recode_factor(
    "bonding_cat" = "Bonding\nSocial Capital",
    "bridging_cat" = "Bridging\nSocial Capital",
    "linking_cat"= "Linking\nSocial Capital") %>% 
      factor(levels = levels(.) %>% rev())) %>%
  ggplot(mapping = aes(x = measure, y = diff, ymin = lower_ci, ymax = upper_ci,
                       color = measure, label = round(diff, 1))) +
  geom_hline(yintercept = 0, color = "grey", linetype = "dashed") +
  geom_linerange(size = 1.15) +
  geom_point(size = 3) +
  geom_point(size = 2, color = "white") +
  geom_text(nudge_x = 0.25) +
  coord_flip() +
  facet_grid(~type, scales = "free") +
  guides(color = FALSE) +
  theme_classic(base_size = 14) +
  theme(panel.border = element_rect(fill = NA, color = "black"),
        panel.spacing = unit(0.25, "cm"),
        plot.caption = element_text(hjust = 0.5),
        plot.subtitle = element_text(hjust = 0.5)) +
  scale_color_manual(values = c("#648FFF", "#DC267F", "#FFB000")) +
  labs(x = NULL, 
       y = "Difference in Mean Excess Death Rate\n(in Deaths per 100,000 residents)",
       caption = "Displays difference in mean excess death rates for US counties\nwith social capital above the median minus counties below the median in each time period.") +
  ggsave("viz/figure_diff.png", dpi = 500, width = 10, height= 3.5)
```


## Map

```{r}
library(tidyverse)
library(sf)
library(rgdal)
library(tigris)


states <- tigris::states(year = 2019, cb = T) %>%
  st_as_sf() %>%
  filter(STUSPS %in% state.abb) %>%
  filter(!STUSPS %in% c("AK", "HI", "PR") )

counties <- tigris::counties(year = 2019, cb = TRUE) %>%
  st_as_sf() %>%
  dplyr::select(name = NAME, fips = GEOID, geometry) %>%
  filter(str_sub(fips,1,2) %in% states$STATEFP) %>%
  # join in some county traits
  left_join(by = "fips",y = read_rds("dataset.rds") %>%
              dplyr::select(fips, date, excess_deaths_5yr_period_rate) %>%
              group_by(fips) %>%
              summarize(date = min(as.Date(date)),
                        peak_rate = max(excess_deaths_5yr_period_rate))) %>%
  ungroup() %>%
  mutate(date = date %>% as.character() %>% dplyr::recode(
    "2020-04-25" = "2/1 - 4/25", 
    "2020-05-16" = "4/26 - 5/16", 
    "2020-05-23" = "5/17 - 5/23", 
    "2020-08-22" =  "5/24 - 8/22", 
    "2020-08-29" = "8/23 - 8/29",
    "2020-09-26" = "8/30 - 9/26",
    .missing = "Not represented") %>%
      factor(levels = c("2/1 - 4/25", "4/26 - 5/16","5/17 - 5/23", 
                        "5/24 - 8/22","8/23 - 8/29","8/30 - 9/26",
                        "Not represented"))) %>%
  mutate(mycolor = if_else(date == "Not represented", "not represented", "represented"))


ggplot() +
  geom_sf(data = states, color = "lightgrey", size = 5) +
  geom_sf(data = counties, mapping = aes(fill = date, color = mycolor), 
          size = 0.1) +
  geom_sf(data = counties %>% filter(date != "Not represented"), 
          fill = NA, color = "black", size = 0.1, show.legend = TRUE) + 
  geom_sf(data = states, fill = NA, color = "black", size = 0.5) +
  scale_fill_manual(values = c("#648FFF", "#785EF0", "#DC267F", 
                               "#FE6100", "#FFB000", "darkgrey"))  +
  scale_color_manual(values = c("lightgrey", "black")) +
  theme_void(base_size = 14) +
  theme(plot.caption = element_text(hjust = 0.5),
        plot.subtitle = element_text(hjust = 0.5)) +
  guides(color = FALSE) +
  labs(fill = "Counties by\nFirst Period\nReported",
       subtitle = "Counties observed in original Unbalanced Panel Dataset",
       caption = "Note: CDC reported counties after at least 1 resident\nwas identified as having died of COVID-19.") +
  ggsave("viz/figure_map.png", dpi = 500, height = 5, width = 8)

```

## Sample vs. Population

```{r}

# Identify main variables of interest
myvars <- c("socialcap","bonding", "bridging","linking",
            "sci_psu",
            "sci_usjec",
          "family_unity_usjec",
          "community_health_usjec",
          "collective_efficacy_usjec",
          "institutional_health_usjec",
          "democrat_2016", "employee_muni", 
          "workplaces","pop_density",
          "health_care_capacity", "uninsured", "health_quality",
          "pop_age_65_plus", "pop_female", "pop_black",
          "median_household_income", "some_college")
  


# Identify the balanced panel
balanced <- read_rds("dataset.rds") %>% 
  select(date, fips) %>%
  group_by(fips) %>%
  count() %>%
  filter(n == 6) %>%
  select(fips) %>% unlist()

dat <- bind_rows(
  # Get unbalanced panel
  read_rds("dataset_unimputed.rds") %>%
    select(fips, date, myvars) %>%
    mutate(type = "Unbalanced Panel"),
  # Get balanced panel
  read_rds("dataset_unimputed.rds") %>%
    select(fips, date, myvars) %>%
    filter(fips %in% balanced) %>%
    mutate(type = "Balanced Panel"),
  # Get population
  read_rds("dataset_pop_unimputed.rds") %>%
    select(fips, date, myvars) %>%
    mutate(type = "Population")
) %>%
  # Multiply percentages to 100
  mutate_at(vars(pop_black, pop_age_65_plus, pop_female,
                 some_college, democrat_2016,uninsured),
            funs(. * 100)) %>%
  mutate(median_household_income = median_household_income / 1000,
         pop_density = log(pop_density / 100) ) %>%
  # Pivot longer
  pivot_longer(cols = -c(fips, date, type),
               names_to = "measure", values_to = "value") %>%
  mutate(measure = measure %>% recode_factor(
    # Social Capital Effects
    "socialcap" = "Social Capital",
    "sci_psu" = "Social Capital\n(Rupasingha et al.)",
    "sci_usjec" = "Social Capital\n(USJEC)",
    # types
    "bonding" = "Bonding\nSocial Capital",
    "bridging" = "Bridging\nSocial Capital",
    "linking" = "Linking\nSocial Capital",
    # other types
    "family_unity_usjec" = "Family\nUnity",
    "community_health_usjec" = "Community\nHealth",
    "collective_efficacy_usjec" = "Collective\nEfficacy",
    "institutional_health_usjec" = "Institutional\nHealth",
    # Mobility
    "workplaces" = "Avg. % Change\nWorkplace Mobility",
    # Governance
    "democrat_2016" = "% Voted Democrat\n2016 Pres. Election",
    "employee_muni" = "Municipal Employees\nper 1000 residents",
    # Health Care Capacity
    "health_care_capacity" = "Health Care\nCapacity Index",
    "uninsured" = "% Uninsured",
    # Health Conditions
    "health_quality" = "Quality of Health",
    # Socioeconomics
    "median_household_income" = "Median Household\nIncome (thousands)",
    "some_college" = "% Some college or more",
    # Demographics
    "pop_age_65_plus" = "% Age 65 or over",
    "pop_female" = "% Women",
    "pop_black" = "% Black",
    "pop_density" = "Pop. Density (100s\nper sq.km. (log))")) %>%
  mutate(type = factor(type, levels = c(
    "Population", "Unbalanced Panel", "Balanced Panel")))


dat %>% 
  ggplot(mapping = aes(x = "", y = value, fill = type, color = type)) +
  geom_violin(alpha = 0.5, draw_quantiles = 0.5) +
  coord_flip() +
  facet_wrap(~measure, scales = "free", ncol = 6) +
  theme_classic(base_size = 14) +
  theme(panel.border = element_rect(fill = NA, color = "black"),
        panel.spacing = unit(0.25, "cm"),
        legend.position = "bottom",
        axis.ticks = element_blank(),
        plot.subtitle = element_text(hjust = 0.5)) +
  scale_fill_manual(values = c("#648FFF", "#DC267F", "#FFB000")) +
  scale_color_manual(values = c("#648FFF", "#DC267F", "#FFB000")) +
  guides(color = FALSE) +
  labs(subtitle = "Covariate Distributions",
       y = NULL, x = NULL, fill = "Sample") +
  ggsave("viz/figure_pop_sample.png", dpi = 500, width = 11, height = 7)
```


## Comparison of Models

```{r}
library(ggeffects)


# Identify main variables of interest
myvars <- c("excess_deaths_1yr_period_rate",
            "excess_deaths_3yr_period_rate", 
            "excess_deaths_5yr_period_rate", 
            "socialcap","bonding", "bridging","linking",
            "sci_psu",
            "sci_usjec",
          "family_unity_usjec",
          "community_health_usjec",
          "collective_efficacy_usjec",
          "institutional_health_usjec",
          "democrat_2016", "employee_muni", 
          "workplaces","pop_density",
          "health_care_capacity", "uninsured", "health_quality",
          "pop_age_65_plus", "pop_female", "pop_black",
          "median_household_income", "some_college")
  

# Import data as unbalanced panel
datun <- read_rds("dataset.rds") %>%
  # Format all numeric variables
  # Rescale each as Z-scores
  mutate_at(vars(workplaces:pop_density),
            funs(scale(.) %>% as.numeric()))  %>%
  mutate(date = factor(date)) %>%
  arrange(fips, date)


# Identify the balanced panel
balanced <- read_rds("dataset.rds") %>% 
  select(date, fips) %>%
  group_by(fips) %>%
  count() %>%
  filter(n == 6) %>%
  select(fips) %>% unlist()


# Filter to balanced panel
dat <- datun %>%
  filter(fips %in% balanced) %>%
  as.data.frame() 

# Write a function to extract matched data
get_matched_data = function(matchmodel){
  dat %>%
    # Join into it
    left_join(by = c("fips"),
              # The coarsened exact matching weights
              y = dat %>%
                filter(date == "2020-04-25") %>%
                # From the specified matched sample
                mutate(weights = matchmodel$weights) %>%
                select(fips, weights)) %>%
    filter(weights > 0) %>% 
    mutate(label = matchmodel$formula[2] %>% as.character() %>% str_remove("_cat[(][)]")) %>%
    return()
}

# Get the matched datasets
datm <- read_rds("analysis/matched_models.rds") %>%
  map_dfr(~get_matched_data(.)) 


out <- bind_rows(
  datm %>%
    select(fips, date, weights, myvars, label) %>%
    mutate(type = "Matched Panel"),
  
  # Get the balanced panel
  dat %>%
    # Zoom into just main cases
    select(fips, date, myvars) %>%
    mutate(type = "Balanced Panel"),
  
  datun %>%
     # Zoom into just main cases
    select(fips, date, myvars) %>%
    mutate(type = "Unbalanced Panel"))

remove(dat, datm, datun)

mydates <- out$date %>% unique() %>% sort()

# Kyne & Aldrich, broken into three subindices
m1 <- out %>%
  filter(label == "bonding_cat") %>%
  zelig(formula = excess_deaths_5yr_period_rate ~ 
       bonding + bridging + linking + 
       democrat_2016 + workplaces +  
       pop_density +  health_care_capacity + uninsured + health_quality +
       pop_age_65_plus +  pop_female +  pop_black + #pop_hisp + 
       median_household_income + some_college + employee_muni +
         factor(date), model = "ls", weights = "weights")

library(ZeligMultilevel)
m2 <- zlsmixed$new()
m2 <- out %>%
  filter(label == "bonding_cat") %>%
  zelig(formula = excess_deaths_5yr_period_rate ~ 
       bonding + bridging + linking + 
       democrat_2016 + workplaces +  
       pop_density +  health_care_capacity + uninsured + health_quality +
       pop_age_65_plus +  pop_female +  pop_black + #pop_hisp + 
       median_household_income + some_college + employee_muni +
        (1 | date), weights = weights, model = "ls.mixed")

m3 <- out %>%
  filter(label == "bonding_cat") %>%
  zelig(formula = excess_deaths_5yr_period_rate ~ 
       bonding + bridging + linking + 
       democrat_2016 + workplaces +  
       pop_density +  health_care_capacity + uninsured + health_quality +
       pop_age_65_plus +  pop_female +  pop_black + #pop_hisp + 
       median_household_income + some_college + employee_muni,
       weights = "weights", model = "normal.gee", id = "fips", 
       corstr = "exchangeable")


# Let's write a quick function to use ZeligMultilevel
get_mixed_sim = function(value){
  myzelig <- m2
  myzelig$setx(bonding = value)
  myzelig$sim(num = 1000)
  
  data.frame(ev = myzelig$sim.out$x$ev %>% unlist(),
             bonding = value) %>%
    return()
}
# Extract main variables
m2out <- data.frame(value = seq(from = -2, to = 2, length.out = 30)) %>%
  split(.$value) %>%
  map_dfr(~get_mixed_sim(.$value))
detach("ZeligMultilevel", unload = TRUE)

library(Zelig)

# 90%
mysim90 <- bind_rows(
  m1 %>%
    setx(bonding = seq(from = -2, to = 2, length.out = 30)) %>%
    Zelig::sim() %>%
    zelig_qi_to_df() %>%
    qi_slimmer(qi_type = "ev", ci = 0.90) %>%
    mutate(model = "Fixed\nEffects\nModel"),
  m2out %>%
     group_by(bonding) %>%
    summarize(qi_ci_median = median(ev),
              qi_ci_min = quantile(ev, 0.5),
              qi_ci_max = quantile(ev, 0.95)) %>%
    ungroup() %>%
    mutate(model = "Random\nEffects\nModel"),
  m3 %>% 
    setx(bonding = seq(from = -2, to = 2, length.out = 30)) %>%
    Zelig::sim() %>%
    zelig_qi_to_df() %>%
    qi_slimmer(qi_type = "ev", ci = 0.90) %>%
    mutate(model = "Generalized Estimation\nEquation Model\n(Exchangeable)"))
 
 
# 95% 
mysim95 <- bind_rows(
  m1 %>%
    setx(bonding = seq(from = -2, to = 2, length.out = 30)) %>%
    Zelig::sim() %>%
    zelig_qi_to_df() %>%
    qi_slimmer(qi_type = "ev", ci = 0.95) %>%
    mutate(model = "Fixed\nEffects\nModel"),
  m2out %>%
     group_by(bonding) %>%
    summarize(qi_ci_median = median(ev),
              qi_ci_min = quantile(ev, 0.025),
              qi_ci_max = quantile(ev, 0.975)) %>%
    ungroup() %>%
    mutate(model = "Random\nEffects\nModel"),
  m3 %>% 
    setx(bonding = seq(from = -2, to = 2, length.out = 30)) %>%
    Zelig::sim() %>%
    zelig_qi_to_df() %>%
    qi_slimmer(qi_type = "ev", ci = 0.95) %>%
    mutate(model = "Generalized Estimation\nEquation Model\n(Exchangeable)"))

# 99%
mysim99 <- bind_rows(
  m1 %>%
    setx(bonding = seq(from = -2, to = 2, length.out = 30)) %>%
    Zelig::sim() %>%
    zelig_qi_to_df() %>%
    qi_slimmer(qi_type = "ev", ci = 0.99) %>%
    mutate(model = "Fixed\nEffects\nModel"),
  m2out %>%
     group_by(bonding) %>%
    summarize(qi_ci_median = median(ev),
              qi_ci_min = quantile(ev, 0.01),
              qi_ci_max = quantile(ev, 0.99)) %>%
    ungroup() %>%
    mutate(model = "Random\nEffects\nModel"),
  m3 %>% 
    setx(bonding = seq(from = -2, to = 2, length.out = 30)) %>%
    Zelig::sim() %>%
    zelig_qi_to_df() %>%
    qi_slimmer(qi_type = "ev", ci = 0.99) %>%
    mutate(model = "Generalized Estimation\nEquation Model\n(Exchangeable)"))

# 99.9%
mysim999 <- bind_rows(
  m1 %>%
    setx(bonding = seq(from = -2, to = 2, length.out = 30)) %>%
    Zelig::sim() %>%
    zelig_qi_to_df() %>%
    qi_slimmer(qi_type = "ev", ci = 0.999) %>%
    mutate(model = "Fixed\nEffects\nModel"),
  m2out %>%
     group_by(bonding) %>%
    summarize(qi_ci_median = median(ev),
              qi_ci_min = quantile(ev, 0.001),
              qi_ci_max = quantile(ev, 0.999)) %>%
    ungroup() %>%
    mutate(model = "Random\nEffects\nModel"),
  m3 %>% 
    setx(bonding = seq(from = -2, to = 2, length.out = 30)) %>%
    Zelig::sim() %>%
    zelig_qi_to_df() %>%
    qi_slimmer(qi_type = "ev", ci = 0.999) %>%
    mutate(model = "Generalized Estimation\nEquation Model\n(Exchangeable)"))

# Unfortunately, Zelig and ZeligMultilevel have some conflicts,
# so you might need to restart R and reload Zelig in order to keep using Zelig later

# Apparently ZeligMultilevel loaded the MASS package, 
# making it impossible to use Dplyr's select funciton
# Let's load it in manually
select <- dplyr::select

# Combine
mysim <- bind_rows(mysim90 %>% mutate(type = "90%"),
                   mysim95 %>% mutate(type = "95%"),
                   mysim99 %>% mutate(type = "99%"),
                   mysim999 %>% mutate(type = "99.9%")) %>%
  dplyr::select(bonding, type, contains("qi"), model)

ranks <- mysim %>% 
  pivot_longer(cols = contains("qi_"),
               names_to = "measure",
               values_to = "value")  %>%
  filter(measure != "qi_ci_median") %>%
  group_by(model, bonding) %>%
  arrange(value) %>%
  mutate(order = paste("band", 1:n(), sep = "_")) %>%
  ungroup() %>%
  dplyr::select(-measure, -type) %>%
  pivot_wider(id_cols = c(model, bonding),
              names_from = order,
              values_from = value) %>%
  rename(lower_999 = band_1,
         lower_99 = band_2,
         lower_95 = band_3,
         lower_90 = band_4,
         upper_90 = band_5,
         upper_95 = band_6,
         upper_99 = band_7,
         upper_999 = band_8) 

bands <- bind_rows(
  ranks %>%
    select(model, bonding, lower = lower_999, upper = lower_99) %>%
    mutate(type = "99.9%", version = "1"),
  ranks %>%
    select(model, bonding, lower = lower_99, upper = lower_95) %>%
    mutate(type = "99%", version = "2"),
  ranks %>%
    select(model, bonding, lower = lower_95, upper = lower_90) %>%
    mutate(type = "95%", version = "3"),
  ranks %>%
    select(model, bonding, lower = lower_90, upper = upper_90) %>%
    mutate(type = "90%", version = "4"),
  ranks %>%
    select(model, bonding, lower = upper_90, upper = upper_95) %>%
    mutate(type = "95%", version = "5"),
  ranks %>%
    select(model, bonding, lower = upper_95, upper = upper_99) %>%
    mutate(type = "99%", version = "6"),
  ranks %>%
    select(model, bonding, lower = upper_99, upper = upper_999) %>%
    mutate(type = "99.9%", version = "7")) %>%
  mutate(type = factor(type, levels = c("90%", "95%", "99%", "99.9%")))  %>%
  # Now join back in the median.
  # Note - since you've got so many different bands, you technically have several medians,
  # but they are all virtually identical. 
  # So, we're just going to grab one of them from the 99.9% version.
  left_join(by = c("model", "bonding"), 
            y = mysim %>% select(model, bonding, type, qi_ci_median) %>%
              filter(type == "99.9%") %>% select(-type)) 



bands %>%
  mutate(model = factor(model, levels = c("Fixed\nEffects\nModel",
                                        "Random\nEffects\nModel",
                                        "Generalized Estimation\nEquation Model\n(Exchangeable)"))) %>%
  ggplot(mapping = aes(x = bonding, y = qi_ci_median, 
                       ymin = lower, ymax = upper,group = version,
                       fill = type, color = type)) +
  geom_line(color = "white", size = 0.5) +
  geom_ribbon(color = "white", size = 0.5) +
  facet_wrap(~model)  +
  guides(color = FALSE) +
  viridis::scale_fill_viridis(discrete = TRUE, option = "plasma",
                              begin = 0.1, end = 0.8)  +
    viridis::scale_color_viridis(discrete = TRUE, option = "plasma",
                              begin = 0.1, end = 0.8)  +
  theme_classic(base_size = 14) +
  theme(panel.border = element_rect(fill = NA, color = "black"),
        panel.spacing = unit(0.25, "cm"), legend.position = "right") +
  labs(x = "Bonding Social Capital (Z-score)",
       y = "Excess Deaths per 100,000 residents\ncompared to 2015-2019 average\n(with 95% simulated confidence intervals)",
       fill = "Simulated\nConfidence\nIntervals") +
  ggsave("viz/figure_simulation_compare.png", dpi = 500, width = 9, height = 5)
```
